diff --git a/.gitignore b/.gitignore index b88d654..9eb9194 100644 --- a/.gitignore +++ b/.gitignore @@ -28,4 +28,6 @@ multillidae* # IDEs .idea/ -.vscode/ \ No newline at end of file +.vscode/ +/trabalho_implementacao_01/questao12/build/ +/trabalho_implementacao_01/questao12/nbproject/private/ \ No newline at end of file diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/LICENSE.txt b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/LICENSE.txt new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/NOTICE.txt b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/NOTICE.txt new file mode 100644 index 0000000..950f3db --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/NOTICE.txt @@ -0,0 +1,17 @@ +Apache Commons Codec +Copyright 2002-2017 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java +contains test data from http://aspell.net/test/orig/batch0.tab. +Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org) + +=============================================================================== + +The content of package org.apache.commons.codec.language.bm has been translated +from the original php source code available at http://stevemorse.org/phoneticinfo.htm +with permission from the original authors. +Original source copyright: +Copyright (c) 2008 Alexander Beider & Stephen P. Morse. diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/RELEASE-NOTES.txt b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/RELEASE-NOTES.txt new file mode 100644 index 0000000..fa24067 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/RELEASE-NOTES.txt @@ -0,0 +1,168 @@ + Apache Commons Codec 1.12 RELEASE NOTES + +The Apache Commons Codec team is pleased to announce the commons-codec-1.12 release! + +The Apache Commons Codec package contains simple encoder and decoders for + various formats such as Base64 and Hexadecimal. In addition to these + widely used encoders and decoders, the codec package also maintains a + collection of phonetic encoding utilities. + +Changes in this version include: + +New features: +o Add Percent-Encoding Codec (described in RFC3986 and RFC7578) Issue: CODEC-240. Thanks to Ioannis Sermetziadis. +o Add SHA-3 methods in DigestUtils Issue: CODEC-251. Thanks to Gary Gregory. + +Fixed Bugs: +o B64 salt generator: Random -> ThreadLocalRandom Issue: CODEC-252. +o Wrong value calculated by Cologne Phonetic if a special character is placed between equal letters Issue: CODEC-250. Thanks to Alex Volodko. +o ColognePhoneticTest.testIsEncodeEquals missing assertions Issue: CODEC-246. Thanks to Oscar Luis Vera Pérez. + +Changes: +o Update from Java 7 to Java 8 Issue: CODEC-253. + + +Have fun! +-Apache Commons Codec team + + +------------------------------------------------------------------------------- + + Apache Commons Codec 1.11 RELEASE NOTES + +The Apache Commons Codec team is pleased to announce the commons-codec-1.11-SNAPSHOT release! + +The Apache Commons Codec package contains simple encoder and decoders for + various formats such as Base64 and Hexadecimal. In addition to these + widely used encoders and decoders, the codec package also maintains a + collection of phonetic encoding utilities. + +Changes in this version include: + +New features: +o Add support for XXHash32 Issue: CODEC-241. +o Fluent interface for DigestUtils Issue: CODEC-220. +o Fluent interface for HmacUtils Issue: CODEC-222. +o Add support for CRC32-C Issue: CODEC-171. Thanks to Brett Okken. +o Add HmacAlgorithms.HMAC_SHA_224 (Java 8 only) Issue: CODEC-217. Thanks to Gary Gregory. +o Support JEP 287: SHA-3 Hash Algorithms Issue: CODEC-213. Thanks to Gary Gregory. +o Create a minimal Digest command line utility: org.apache.commons.codec.digest.Digest Issue: CODEC-212. Thanks to Gary Gregory. +o Add DigestUtils.getDigest(String, MessageDigest) Issue: CODEC-210. Thanks to Gary Gregory. +o Make some DigestUtils APIs public Issue: CODEC-208. Thanks to Gary Gregory. +o Add java.io.File APIs to MessageDigestAlgorithm Issue: CODEC-206. Thanks to Gary Gregory. +o BaseNCodecOutputStream only supports writing EOF on close() Issue: CODEC-183. Thanks to Steven Wurster. +o Support SHA-224 in DigestUtils on Java 8 Issue: CODEC-195. Thanks to Gary Gregory. +o Support java.nio.ByteBuffer in org.apache.commons.codec.binary.Hex Issue: CODEC-194. Thanks to Gary Gregory. +o Support java.nio.ByteBuffer in DigestUtils Issue: CODEC-193. Thanks to Michael Donaghy. +o Add BaseNCodec.encode(byte[], int, int) input with offset and length parameters for Base64 and Base32. Issue: CODEC-202. Thanks to Oleg Kalnichevski. +o Add convenience method decodeHex(String). Issue: CODEC-203. Thanks to Gary Gregory. +o Add faster CRC32 implementation. Issue: CODEC-205. Thanks to Gary Gregory. +o Add convenience API org.apache.commons.codec.binary.Hex.encodeHexString(byte[]|ByteBuffer, boolean). Issue: CODEC-224. Thanks to Gary Gregory. +o Add Automatic-Module-Name manifest entry for Java 9. Issue: CODEC-242. Thanks to Gary Gregory. + +Fixed Bugs: +o Base64.encodeBase64String could better use newStringUsAscii (ditto encodeBase64URLSafeString) Issue: CODEC-145. Thanks to Jesse Glick. +o BaseNCodec: encodeToString and encodeAsString methods are identical Issue: CODEC-144. +o URLCodec is neither immutable nor threadsafe Issue: CODEC-232. +o StringUtils.equals(CharSequence cs1, CharSequence cs2) can fail with String Index OBE Issue: CODEC-231. +o URLCodec.WWW_FORM_URL should be private Issue: CODEC-230. +o StringUtils.newStringxxx(null) should return null, not NPE Issue: CODEC-229. +o Fix minor resource leaks Issue: CODEC-225. Thanks to Svetlin Zarev. +o Base32.HEX_DECODE_TABLE contains the wrong value 32 Issue: CODEC-200. Thanks to Luciano Vernaschi. +o Charsets Javadoc breaks build when using Java 8 Issue: CODEC-207. Thanks to Gary Gregory. +o Bug in HW rule in Soundex Issue: CODEC-199. Thanks to Yossi Tamari. +o Javadoc for SHA-224 DigestUtils methods should mention Java 1.8.0 restriction instead of 1.4.0. Issue: CODEC-209. Thanks to Gary Gregory. +o Don't deprecate Charsets Charset constants in favor of Java 7's java.nio.charset.StandardCharsets Issue: CODEC-219. Thanks to Gary Gregory, Sebb. + +Changes: +o Base32.decode should support lowercase letters Issue: CODEC-234. Thanks to Christopher Schultz, Sebb. +o Soundex should support more algorithm variants Issue: CODEC-233. Thanks to Yossi Tamari. +o HmacUtils.updateHmac calls reset() unnecessarily Issue: CODEC-221. + +Removed: +o Drop obsolete Ant build Issue: CODEC-223. + +Have fun! +-Apache Commons Codec team + + +------------------------------------------------------------------------------- + + Apache Commons Codec 1.10 RELEASE NOTES + +The Apache Commons Codec team is pleased to announce the commons-codec-1.10 release! + +The Apache Commons Codec package contains simple encoder and decoders for + various formats such as Base64 and Hexadecimal. In addition to these + widely used encoders and decoders, the codec package also maintains a + collection of phonetic encoding utilities. + +This feature and fix release requires a minimum of Java 1.6, same as 1.9. + +Changes in this version include: + +New features: + +o Add Daitch-Mokotoff Soundex + Issue: CODEC-192. Thanks to Thomas Neidhart. +o QuotedPrintableCodec does not support soft line break per the 'quoted-printable' example on Wikipedia + Issue: CODEC-121. Thanks to Thomas Neidhart, Java John. +o Make possible to provide padding byte to BaseNCodec in constructor + Issue: CODEC-181. Thanks to Ivan Martinez-Ortiz. + +Fixed Bugs: + +o Added clarification to Javadoc of Base64 concerning the use of the urlSafe parameter + Issue: CODEC-185. Thanks to Sean Busbey. +o Added clarification to the Javadoc of Base[32|64]OutputStream that it is mandatory to call close() + Issue: CODEC-191. Thanks to Igor Savin. +o Add support for HMAC Message Authentication Code (MAC) digests + Issue: CODEC-188. Thanks to Hendrik Saly. +o Beider Morse Phonetic Matching producing incorrect tokens + Issue: CODEC-187. Thanks to Michael Tobias, Thomas Neidhart. +o NullPointerException in DoubleMetaPhone.isDoubleMetaphoneEqual when using empty strings + Issue: CODEC-184. Thanks to Cyrille Artho. +o Fix Javadoc 1.8.0 errors + Issue: CODEC-180. Thanks to Ville Skyttä. +o Fix Java 8 build Javadoc errors + Issue: CODEC-189. + +Changes: + +o Deprecate Charsets Charset constants in favor of Java 7's java.nio.charset.StandardCharsets + Issue: CODEC-178. +o Update from commons-parent 34 to 35 + Issue: CODEC-190. + + +Have fun! +-Apache Commons Codec team + + +------------------------------------------------------------------------------- + + Apache Commons Codec 1.9 RELEASE NOTES + +The codec package contains simple encoder and decoders for +various formats such as Base64 and Hexadecimal. In addition to these +widely used encoders and decoders, the codec package also maintains a +collection of phonetic encoding utilities. + +This feature and fix release requires a minimum of Java 1.6, same as 1.8. + +Changes in this version include: + +Performance: +o CODEC-174: Improve performance of Beider Morse encoder. Thanks to Thomas Champagne. + +Fixed Bugs: +o CODEC-175: Beider Morse does not close Scanners used to read config files. +o CODEC-172: Base32 decode table has spurious value. Thanks to Matt Bishop. +o CODEC-170: Link broken in Metaphone Javadoc. Thanks to Ron Wheeler, Henri Yandell. +o CODEC-176: Spelling fixes in Javadoc and comments. Thanks to Ville Skyttä. + + +For complete information on Apache Commons Codec, including instructions on how to submit bug reports, +patches, or suggestions for improvement, see the Apache Commons Codec website: + +http://commons.apache.org/proper/commons-codec/ diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/allclasses-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/allclasses-frame.html new file mode 100644 index 0000000..e964a06 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/allclasses-frame.html @@ -0,0 +1,84 @@ + + + +
+ + +Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+ISO_8859_1 |
+"ISO-8859-1" |
+
+
+public static final String |
+US_ASCII |
+"US-ASCII" |
+
+
+public static final String |
+UTF_16 |
+"UTF-16" |
+
+
+public static final String |
+UTF_16BE |
+"UTF-16BE" |
+
+
+public static final String |
+UTF_16LE |
+"UTF-16LE" |
+
+
+public static final String |
+UTF_8 |
+"UTF-8" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+protected static final int |
+MASK_8BITS |
+255 |
+
+
+public static final int |
+MIME_CHUNK_SIZE |
+76 |
+
+
+protected final byte |
+PAD |
+61 |
+
+
+protected static final byte |
+PAD_DEFAULT |
+61 |
+
+
+public static final int |
+PEM_CHUNK_SIZE |
+64 |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+DEFAULT_CHARSET_NAME |
+"UTF-8" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+MD2 |
+"MD2" |
+
+
+public static final String |
+MD5 |
+"MD5" |
+
+
+public static final String |
+SHA_1 |
+"SHA-1" |
+
+
+public static final String |
+SHA_224 |
+"SHA-224" |
+
+
+public static final String |
+SHA_256 |
+"SHA-256" |
+
+
+public static final String |
+SHA_384 |
+"SHA-384" |
+
+
+public static final String |
+SHA_512 |
+"SHA-512" |
+
+
+public static final String |
+SHA3_224 |
+"SHA3-224" |
+
+
+public static final String |
+SHA3_256 |
+"SHA3-256" |
+
+
+public static final String |
+SHA3_384 |
+"SHA3-384" |
+
+
+public static final String |
+SHA3_512 |
+"SHA3-512" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+US_ENGLISH_MAPPING_STRING |
+"01360240043788015936020505" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final char |
+SILENT_MARKER |
+45 |
+
+
+public static final String |
+US_ENGLISH_MAPPING_STRING |
+"01230120022455012623010202" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+ANY |
+"any" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+public static final String |
+ALL |
+"ALL" |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+protected static final String |
+POSTFIX |
+"?=" |
+
+
+protected static final String |
+PREFIX |
+"=?" |
+
+
+protected static final char |
+SEP |
+63 |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+protected static final String |
+POSTFIX |
+"?=" |
+
+
+protected static final String |
+PREFIX |
+"=?" |
+
+
+protected static final char |
+SEP |
+63 |
+
Modifier and Type | +Constant Field | +Value | +
---|---|---|
+
+protected static final byte |
+ESCAPE_CHAR |
+37 |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/deprecated-list.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/deprecated-list.html new file mode 100644 index 0000000..951dfb2 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/deprecated-list.html @@ -0,0 +1,463 @@ + + + + + + +Class and Description | +
---|
org.apache.commons.codec.language.Caverphone
+ 1.5 Replaced by
+Caverphone2 , will be removed in 2.0. |
+
Field and Description | +
---|
org.apache.commons.codec.net.URLCodec.charset
+ TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126)
+ |
+
org.apache.commons.codec.binary.BaseNCodec.PAD
+ Use
+BaseNCodec.pad . Will be removed in 2.0. |
+
org.apache.commons.codec.net.URLCodec.WWW_FORM_URL
+ 1.11 Will be removed in 2.0 (CODEC-230)
+ |
+
Method and Description | +
---|
org.apache.commons.codec.net.URLCodec.getEncoding()
+ Use
+URLCodec.getDefaultCharset() , will be removed in 2.0. |
+
org.apache.commons.codec.digest.HmacUtils.getHmacMd5(byte[])
+ (1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_MD5, byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.getHmacSha1(byte[])
+ (1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_1, byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.getHmacSha256(byte[])
+ (1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_256, byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.getHmacSha384(byte[])
+ (1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_384, byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.getHmacSha512(byte[])
+ (1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_512, byte[]) |
+
org.apache.commons.codec.language.Soundex.getMaxLength()
+ This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
+ |
+
org.apache.commons.codec.digest.DigestUtils.getShaDigest()
+ (1.11) Use
+DigestUtils.getSha1Digest() |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmac(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5Hex(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5Hex(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacMd5Hex(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmacHex(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmac(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1Hex(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1Hex(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha1Hex(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmacHex(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmac(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256Hex(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256Hex(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha256Hex(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmacHex(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmac(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384Hex(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384Hex(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha384Hex(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmacHex(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmac(String) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512Hex(byte[], byte[])
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(byte[]) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512Hex(byte[], InputStream)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(InputStream) |
+
org.apache.commons.codec.digest.HmacUtils.hmacSha512Hex(String, String)
+ (1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmacHex(String) |
+
org.apache.commons.codec.binary.Base64.isArrayByteBase64(byte[])
+ 1.5 Use
+Base64.isBase64(byte[]) , will be removed in 2.0. |
+
org.apache.commons.codec.language.bm.Rule.Phoneme.join(Rule.Phoneme)
+ since 1.9
+ |
+
org.apache.commons.codec.language.Soundex.setMaxLength(int)
+ This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
+ |
+
org.apache.commons.codec.digest.DigestUtils.sha(byte[])
+ (1.11) Use
+DigestUtils.sha1(byte[]) |
+
org.apache.commons.codec.digest.DigestUtils.sha(InputStream)
+ (1.11) Use
+DigestUtils.sha1(InputStream) |
+
org.apache.commons.codec.digest.DigestUtils.sha(String)
+ (1.11) Use
+DigestUtils.sha1(String) |
+
org.apache.commons.codec.digest.DigestUtils.shaHex(byte[])
+ (1.11) Use
+DigestUtils.sha1Hex(byte[]) |
+
org.apache.commons.codec.digest.DigestUtils.shaHex(InputStream)
+ (1.11) Use
+DigestUtils.sha1Hex(InputStream) |
+
org.apache.commons.codec.digest.DigestUtils.shaHex(String)
+ (1.11) Use
+DigestUtils.sha1Hex(String) |
+
Constructor and Description | +
---|
org.apache.commons.codec.digest.DigestUtils()
+ since 1.11; only useful to preserve binary compatibility
+ |
+
org.apache.commons.codec.digest.HmacUtils()
+ since 1.11; only useful to preserve binary compatibility
+ |
+
org.apache.commons.codec.StringEncoderComparator()
+ Creating an instance without a
+StringEncoder leads to a NullPointerException . Will be
+ removed in 2.0. |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/help-doc.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/help-doc.html new file mode 100644 index 0000000..9e55f69 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/help-doc.html @@ -0,0 +1,230 @@ + + + + + + +The Overview page is the front page of this API document and provides a list of all packages with a summary for each. This page can also contain an overall description of the set of packages.
+Each package has a page that contains a list of its classes and interfaces, with a summary for each. This page can contain six categories:
+Each class, interface, nested class and nested interface has its own separate page. Each of these pages has three sections consisting of a class/interface description, summary tables, and detailed member descriptions:
+Each summary entry contains the first sentence from the detailed description for that item. The summary entries are alphabetical, while the detailed descriptions are in the order they appear in the source code. This preserves the logical groupings established by the programmer.
+Each annotation type has its own separate page with the following sections:
+Each enum has its own separate page with the following sections:
+Each documented package, class and interface has its own Use page. This page describes what packages, classes, methods, constructors and fields use any part of the given class or package. Given a class or interface A, its Use page includes subclasses of A, fields declared as A, methods that return A, and methods and constructors with parameters of type A. You can access this page by first going to the package, class or interface, then clicking on the "Use" link in the navigation bar.
+There is a Class Hierarchy page for all packages, plus a hierarchy for each package. Each hierarchy page contains a list of classes and a list of interfaces. The classes are organized by inheritance structure starting with java.lang.Object
. The interfaces do not inherit from java.lang.Object
.
The Deprecated API page lists all of the API that have been deprecated. A deprecated API is not recommended for use, generally due to improvements, and a replacement API is usually given. Deprecated APIs may be removed in future implementations.
+The Index contains an alphabetic list of all classes, interfaces, constructors, methods, and fields.
+These links take you to the next or previous class, interface, package, or related page.
+These links show and hide the HTML frames. All pages are available with or without frames.
+The All Classes link shows all classes and interfaces except non-static nested types.
+Each serializable or externalizable class has a description of its serialization fields and methods. This information is of interest to re-implementors, not to developers using the API. While there is no link in the navigation bar, you can get to this information by going to any serialized class and clicking "Serialized Form" in the "See also" section of the class description.
+The Constant Field Values page lists the static final fields and their values.
+Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index-all.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index-all.html new file mode 100644 index 0000000..178de24 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index-all.html @@ -0,0 +1,3063 @@ + + + + + + +Md5Crypt.apr1Crypt(byte[], String)
for details.Md5Crypt.apr1Crypt(byte[], String)
for details.Md5Crypt.apr1Crypt(String, String)
for details.Md5Crypt.apr1Crypt(String, String)
for details.lineLength
is rounded down to the nearest multiple of BaseNCodec.encodedBlockSize
+ If chunkSeparatorLength
is zero, then chunking is disabled.lineLength
is rounded down to the nearest multiple of BaseNCodec.encodedBlockSize
+ If chunkSeparatorLength
is zero, then chunking is disabled.Caverphone2
, will be removed in 2.0.CharSequence
that are null
safe.null
as its detail message.(cause==null ?
+ null : cause.toString())
(which typically contains the class and detail message of cause
).Charsets.UTF_8
CharEncoding.UTF_8
MessageDigest
tasks.MessageDigest
parameter.MessageDigest
parameter.null
as its detail message.(cause==null ?
+ null : cause.toString())
(which typically contains the class and detail message of cause
).size
bytestrue
if they represent equal sequences of characters.MessageDigest
for the given algorithm
.MessageDigest
for the given algorithm
or a default if there is a problem
+ getting the algorithm.URLCodec.getDefaultCharset()
, will be removed in 2.0.getInitializedMac(HmacAlgorithms.HMAC_MD5, byte[])
getInitializedMac(HmacAlgorithms.HMAC_SHA_1, byte[])
getInitializedMac(HmacAlgorithms.HMAC_SHA_256, byte[])
getInitializedMac(HmacAlgorithms.HMAC_SHA_384, byte[])
getInitializedMac(HmacAlgorithms.HMAC_SHA_512, byte[])
Mac
for the given algorithm
.Mac
for the given algorithm
.DigestUtils.getSha1Digest()
Hex.DEFAULT_CHARSET
HmacUtils
algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation.new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(byte[])
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmac(String)
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(byte[])
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmacHex(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmac(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmacHex(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmac(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmacHex(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmac(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmacHex(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmac(String)
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(byte[])
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(InputStream)
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmacHex(String)
Mac
tasks.Base64.isBase64(byte[])
, will be removed in 2.0.octet
is in the base 64 alphabet.String
values
+ are equal.String
values
+ are equal, optionally using the alternate value.octet
is in the Base32 alphabet.octet
is in the Base64 alphabet.octet
is in the current alphabet.Nysiis
encoder.args[0]
on the file in args[1]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.MessageDigest
algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation.String
by decoding the specified array of bytes using the given charset.String
by decoding the specified array of bytes using the ISO-8859-1 charset.String
by decoding the specified array of bytes using the US-ASCII charset.String
by decoding the specified array of bytes using the UTF-16 charset.String
by decoding the specified array of bytes using the UTF-16BE charset.String
by decoding the specified array of bytes using the UTF-16LE charset.String
by decoding the specified array of bytes using the UTF-8 charset.Nysiis
encoder with strict mode (original form),
+ i.e.Nysiis
encoder with the specified strict mode:
+
+
+ true
: encoded strings have a maximum length of 6
+ false
: encoded strings may have arbitrary length
+ MessageDigest
tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant.BaseNCodec.pad
. Will be removed in 2.0.Charsets.UTF_8
byte
from this input stream.len
bytes into the specified b
array starting at offset
+ from this InputStream.DigestUtils.sha1(byte[])
DigestUtils.sha1(InputStream)
DigestUtils.sha1(String)
byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.byte[]
.DigestUtils.sha1Hex(byte[])
DigestUtils.sha1Hex(InputStream)
DigestUtils.sha1Hex(String)
StringEncoder
.StringEncoder
leads to a NullPointerException
. Will be
+ removed in 2.0.MessageDigest
.MessageDigest
.MessageDigest
from a String (converted to bytes using UTF-8).Mac
with the value.Mac
with the value.Mac
with the value.byte
to this output stream.len
bytes from the specified b
array starting at offset
to this
+ output stream.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index.html new file mode 100644 index 0000000..e745c0d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/index.html @@ -0,0 +1,76 @@ + + + + + + +public interface BinaryDecoder +extends Decoder+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] source)
+Decodes a byte array and returns the results as a byte array.
+ |
+
byte[] decode(byte[] source) + throws DecoderException+
source
- A byte array which has been encoded with the appropriate encoderDecoderException
- A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/BinaryEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/BinaryEncoder.html new file mode 100644 index 0000000..0a6da5e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/BinaryEncoder.html @@ -0,0 +1,255 @@ + + + + + + +public interface BinaryEncoder +extends Encoder+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+encode(byte[] source)
+Encodes a byte array and return the encoded data as a byte array.
+ |
+
byte[] encode(byte[] source) + throws EncoderException+
source
- Data to be encodedEncoderException
- thrown if the Encoder encounters a failure condition during the encoding process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/CharEncoding.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/CharEncoding.html new file mode 100644 index 0000000..c1afdc8 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/CharEncoding.html @@ -0,0 +1,441 @@ + + + + + + +public class CharEncoding +extends Object+
+ Every implementation of the Java platform is required to support the following character encodings. Consult the + release documentation for your implementation to see if any other encodings are supported. Consult the release + documentation for your implementation to see if any other encodings are supported. +
+ +US-ASCII
ISO-8859-1
UTF-8
UTF-16BE
UTF-16LE
UTF-16
+ This class is immutable and thread-safe. +
Modifier and Type | +Field and Description | +
---|---|
static String |
+ISO_8859_1
+CharEncodingISO Latin Alphabet No.
+ |
+
static String |
+US_ASCII
+Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
+ |
+
static String |
+UTF_16
+Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
+ (either order accepted on input, big-endian used on output)
+ |
+
static String |
+UTF_16BE
+Sixteen-bit Unicode Transformation Format, big-endian byte order.
+ |
+
static String |
+UTF_16LE
+Sixteen-bit Unicode Transformation Format, little-endian byte order.
+ |
+
static String |
+UTF_8
+Eight-bit Unicode Transformation Format.
+ |
+
Constructor and Description | +
---|
CharEncoding() |
+
public static final String ISO_8859_1+
+ Every implementation of the Java platform is required to support this character encoding.
public static final String US_ASCII+
+ Every implementation of the Java platform is required to support this character encoding.
public static final String UTF_16+
+ Every implementation of the Java platform is required to support this character encoding.
public static final String UTF_16BE+
+ Every implementation of the Java platform is required to support this character encoding.
public static final String UTF_16LE+
+ Every implementation of the Java platform is required to support this character encoding.
public static final String UTF_8+
+ Every implementation of the Java platform is required to support this character encoding.
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Charsets.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Charsets.html new file mode 100644 index 0000000..a46a3fa --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Charsets.html @@ -0,0 +1,526 @@ + + + + + + +public class Charsets +extends Object+
+ Every implementation of the Java platform is required to support the following character encodings. Consult the + release documentation for your implementation to see if any other encodings are supported. Consult the release + documentation for your implementation to see if any other encodings are supported. +
+ +US-ASCII
ISO-8859-1
UTF-8
UTF-16BE
UTF-16LE
UTF-16
+ This class is immutable and thread-safe. +
Modifier and Type | +Field and Description | +
---|---|
static Charset |
+ISO_8859_1
+CharEncodingISO Latin Alphabet No.
+ |
+
static Charset |
+US_ASCII
+Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set.
+ |
+
static Charset |
+UTF_16
+Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark
+ (either order accepted on input, big-endian used on output)
+ |
+
static Charset |
+UTF_16BE
+Sixteen-bit Unicode Transformation Format, big-endian byte order.
+ |
+
static Charset |
+UTF_16LE
+Sixteen-bit Unicode Transformation Format, little-endian byte order.
+ |
+
static Charset |
+UTF_8
+Eight-bit Unicode Transformation Format.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Charset |
+toCharset(Charset charset)
+Returns the given Charset or the default Charset if the given Charset is null.
+ |
+
static Charset |
+toCharset(String charset)
+Returns a Charset for the named charset.
+ |
+
public static final Charset ISO_8859_1+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static final Charset US_ASCII+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static final Charset UTF_16+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static final Charset UTF_16BE+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static final Charset UTF_16LE+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static final Charset UTF_8+
+ Every implementation of the Java platform is required to support this character encoding. +
+
+ On Java 7 or later, use StandardCharsets.ISO_8859_1
instead.
+
public static Charset toCharset(Charset charset)+
charset
- A charset or null.public static Charset toCharset(String charset)+
charset
- The name of the requested charset, may be null.UnsupportedCharsetException
- If the named charset is unavailableCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Decoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Decoder.html new file mode 100644 index 0000000..ee0b268 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Decoder.html @@ -0,0 +1,256 @@ + + + + + + +public interface Decoder
+
+ This is the sister interface of Encoder
. All Decoders implement this common generic interface.
+ Allows a user to pass a generic Object to any Decoder implementation in the codec package.
+
+ One of the two interfaces at the center of the codec package.
Modifier and Type | +Method and Description | +
---|---|
Object |
+decode(Object source)
+Decodes an "encoded" Object and returns a "decoded" Object.
+ |
+
Object decode(Object source) + throws DecoderException+
ClassCastException
occurs this decode method will throw a DecoderException.source
- the object to decodeDecoderException
- a decoder exception can be thrown for any number of reasons. Some good candidates are that the
+ parameter passed to this method is null, a param cannot be cast to the appropriate type for a
+ specific encoder.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/DecoderException.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/DecoderException.html new file mode 100644 index 0000000..e8f52a9 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/DecoderException.html @@ -0,0 +1,348 @@ + + + + + + +public class DecoderException +extends Exception+
Decoder
+ encounters a decoding specific exception such as invalid data, or characters outside of the expected range.Constructor and Description | +
---|
DecoderException()
+Constructs a new exception with
+null as its detail message. |
+
DecoderException(String message)
+Constructs a new exception with the specified detail message.
+ |
+
DecoderException(String message,
+ Throwable cause)
+Constructs a new exception with the specified detail message and cause.
+ |
+
DecoderException(Throwable cause)
+Constructs a new exception with the specified cause and a detail message of
+(cause==null ?
+ null : cause.toString()) (which typically contains the class and detail message of cause ). |
+
addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
public DecoderException()+
null
as its detail message. The cause is not initialized, and may
+ subsequently be initialized by a call to Throwable.initCause(java.lang.Throwable)
.public DecoderException(String message)+
Throwable.initCause(java.lang.Throwable)
.message
- The detail message which is saved for later retrieval by the Throwable.getMessage()
method.public DecoderException(String message, + Throwable cause)+
+ Note that the detail message associated with cause
is not automatically incorporated into this
+ exception's detail message.
message
- The detail message which is saved for later retrieval by the Throwable.getMessage()
method.cause
- The cause which is saved for later retrieval by the Throwable.getCause()
method. A null
+ value is permitted, and indicates that the cause is nonexistent or unknown.public DecoderException(Throwable cause)+
(cause==null ?
+ null : cause.toString())
(which typically contains the class and detail message of cause
).
+ This constructor is useful for exceptions that are little more than wrappers for other throwables.cause
- The cause which is saved for later retrieval by the Throwable.getCause()
method. A null
+ value is permitted, and indicates that the cause is nonexistent or unknown.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Encoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Encoder.html new file mode 100644 index 0000000..e179527 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/Encoder.html @@ -0,0 +1,253 @@ + + + + + + +public interface Encoder
+
+ This is the sister interface of Decoder
. Every implementation of Encoder provides this
+ common generic interface which allows a user to pass a generic Object to any Encoder implementation
+ in the codec package.
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object source)
+Encodes an "Object" and returns the encoded content as an Object.
+ |
+
Object encode(Object source) + throws EncoderException+
byte[]
or String
s depending on the implementation used.source
- An object to encodeEncoderException
- An encoder exception is thrown if the encoder experiences a failure condition during the encoding
+ process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/EncoderException.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/EncoderException.html new file mode 100644 index 0000000..9bef00d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/EncoderException.html @@ -0,0 +1,351 @@ + + + + + + +public class EncoderException +extends Exception+
Encoder
encounters a encoding specific exception such as invalid data, inability to calculate a checksum,
+ characters outside of the expected range.Constructor and Description | +
---|
EncoderException()
+Constructs a new exception with
+null as its detail message. |
+
EncoderException(String message)
+Constructs a new exception with the specified detail message.
+ |
+
EncoderException(String message,
+ Throwable cause)
+Constructs a new exception with the specified detail message and cause.
+ |
+
EncoderException(Throwable cause)
+Constructs a new exception with the specified cause and a detail message of
+(cause==null ?
+ null : cause.toString()) (which typically contains the class and detail message of cause ). |
+
addSuppressed, fillInStackTrace, getCause, getLocalizedMessage, getMessage, getStackTrace, getSuppressed, initCause, printStackTrace, printStackTrace, printStackTrace, setStackTrace, toString
public EncoderException()+
null
as its detail message. The cause is not initialized, and may
+ subsequently be initialized by a call to Throwable.initCause(java.lang.Throwable)
.public EncoderException(String message)+
Throwable.initCause(java.lang.Throwable)
.message
- a useful message relating to the encoder specific error.public EncoderException(String message, + Throwable cause)+
+ Note that the detail message associated with cause
is not automatically incorporated into this
+ exception's detail message.
+
message
- The detail message which is saved for later retrieval by the Throwable.getMessage()
method.cause
- The cause which is saved for later retrieval by the Throwable.getCause()
method. A null
+ value is permitted, and indicates that the cause is nonexistent or unknown.public EncoderException(Throwable cause)+
(cause==null ?
+ null : cause.toString())
(which typically contains the class and detail message of cause
).
+ This constructor is useful for exceptions that are little more than wrappers for other throwables.cause
- The cause which is saved for later retrieval by the Throwable.getCause()
method. A null
+ value is permitted, and indicates that the cause is nonexistent or unknown.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringDecoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringDecoder.html new file mode 100644 index 0000000..9815316 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringDecoder.html @@ -0,0 +1,255 @@ + + + + + + +Modifier and Type | +Method and Description | +
---|---|
String |
+decode(String source)
+Decodes a String and returns a String.
+ |
+
String decode(String source) + throws DecoderException+
source
- the String to decodeDecoderException
- thrown if there is an error condition during the Encoding process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoder.html new file mode 100644 index 0000000..f39b80d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoder.html @@ -0,0 +1,255 @@ + + + + + + +public interface StringEncoder +extends Encoder+
Modifier and Type | +Method and Description | +
---|---|
String |
+encode(String source)
+Encodes a String and returns a String.
+ |
+
String encode(String source) + throws EncoderException+
source
- the String to encodeEncoderException
- thrown if there is an error condition during the encoding process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoderComparator.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoderComparator.html new file mode 100644 index 0000000..f9f34da --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/StringEncoderComparator.html @@ -0,0 +1,341 @@ + + + + + + +public class StringEncoderComparator +extends Object +implements Comparator+
StringEncoder
. This comparator is used to sort Strings by an encoding scheme such as
+ Soundex, Metaphone, etc. This class can come in handy if one need to sort Strings by an encoded form of a name such
+ as Soundex.
+
+ This class is immutable and thread-safe.
Constructor and Description | +
---|
StringEncoderComparator()
+Deprecated.
+
+Creating an instance without a
+StringEncoder leads to a NullPointerException . Will be
+ removed in 2.0. |
+
StringEncoderComparator(StringEncoder stringEncoder)
+Constructs a new instance with the given algorithm.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
int |
+compare(Object o1,
+ Object o2)
+Compares two strings based not on the strings themselves, but on an encoding of the two strings using the
+ StringEncoder this Comparator was created with.
+ |
+
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
comparing, comparing, comparingDouble, comparingInt, comparingLong, equals, naturalOrder, nullsFirst, nullsLast, reversed, reverseOrder, thenComparing, thenComparing, thenComparing, thenComparingDouble, thenComparingInt, thenComparingLong
@Deprecated +public StringEncoderComparator()+
StringEncoder
leads to a NullPointerException
. Will be
+ removed in 2.0.public StringEncoderComparator(StringEncoder stringEncoder)+
stringEncoder
- the StringEncoder used for comparisons.public int compare(Object o1, + Object o2)+
EncoderException
is encountered, return 0
.compare
in interface Comparator
o1
- the object to compareo2
- the object to compare toComparable
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32.html new file mode 100644 index 0000000..a2919db --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32.html @@ -0,0 +1,545 @@ + + + + + + +public class Base32 +extends BaseNCodec+
+ The class can be parameterized in the following manner with various constructors: +
++ This class operates directly on byte streams, and not character streams. +
++ This class is thread-safe. +
lineLength, MASK_8BITS, MIME_CHUNK_SIZE, pad, PAD, PAD_DEFAULT, PEM_CHUNK_SIZE
Constructor and Description | +
---|
Base32()
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(boolean useHex)
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(boolean useHex,
+ byte pad)
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(byte pad)
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(int lineLength)
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(int lineLength,
+ byte[] lineSeparator)
+Creates a Base32 codec used for decoding and encoding.
+ |
+
Base32(int lineLength,
+ byte[] lineSeparator,
+ boolean useHex)
+Creates a Base32 / Base32 Hex codec used for decoding and encoding.
+ |
+
Base32(int lineLength,
+ byte[] lineSeparator,
+ boolean useHex,
+ byte pad)
+Creates a Base32 / Base32 Hex codec used for decoding and encoding.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
boolean |
+isInAlphabet(byte octet)
+Returns whether or not the
+octet is in the Base32 alphabet. |
+
containsAlphabetOrPad, decode, decode, decode, encode, encode, encode, encodeAsString, encodeToString, ensureBufferSize, getDefaultBufferSize, getEncodedLength, isInAlphabet, isInAlphabet, isWhiteSpace
public Base32()+
+ When encoding the line length is 0 (no chunking). +
public Base32(byte pad)+
+ When encoding the line length is 0 (no chunking). +
pad
- byte used as padding byte.public Base32(boolean useHex)+
+ When encoding the line length is 0 (no chunking). +
useHex
- if true
then use Base32 Hex alphabetpublic Base32(boolean useHex, + byte pad)+
+ When encoding the line length is 0 (no chunking). +
useHex
- if true
then use Base32 Hex alphabetpad
- byte used as padding byte.public Base32(int lineLength)+
+ When encoding the line length is given in the constructor, the line separator is CRLF. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.public Base32(int lineLength, + byte[] lineSeparator)+
+ When encoding the line length and line separator are given in the constructor. +
++ Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.lineSeparator
- Each line of encoded data will end with this sequence of bytes.IllegalArgumentException
- The provided lineSeparator included some Base32 characters. That's not going to work!public Base32(int lineLength, + byte[] lineSeparator, + boolean useHex)+
+ When encoding the line length and line separator are given in the constructor. +
++ Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.lineSeparator
- Each line of encoded data will end with this sequence of bytes.useHex
- if true
, then use Base32 Hex alphabet, otherwise use Base32 alphabetIllegalArgumentException
- The provided lineSeparator included some Base32 characters. That's not going to work! Or the
+ lineLength > 0 and lineSeparator is null.public Base32(int lineLength, + byte[] lineSeparator, + boolean useHex, + byte pad)+
+ When encoding the line length and line separator are given in the constructor. +
++ Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.lineSeparator
- Each line of encoded data will end with this sequence of bytes.useHex
- if true
, then use Base32 Hex alphabet, otherwise use Base32 alphabetpad
- byte used as padding byte.IllegalArgumentException
- The provided lineSeparator included some Base32 characters. That's not going to work! Or the
+ lineLength > 0 and lineSeparator is null.public boolean isInAlphabet(byte octet)+
octet
is in the Base32 alphabet.isInAlphabet
in class BaseNCodec
octet
- The value to testtrue
if the value is defined in the the Base32 alphabet false
otherwise.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32InputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32InputStream.html new file mode 100644 index 0000000..d45d4e1 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32InputStream.html @@ -0,0 +1,369 @@ + + + + + + +public class Base32InputStream +extends BaseNCodecInputStream+
+ The default behaviour of the Base32InputStream is to DECODE, whereas the default behaviour of the Base32OutputStream + is to ENCODE, but this behaviour can be overridden by using a different constructor. +
++ Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode + character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +
in
Constructor and Description | +
---|
Base32InputStream(InputStream in)
+Creates a Base32InputStream such that all data read is Base32-decoded from the original provided InputStream.
+ |
+
Base32InputStream(InputStream in,
+ boolean doEncode)
+Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
+ provided InputStream.
+ |
+
Base32InputStream(InputStream in,
+ boolean doEncode,
+ int lineLength,
+ byte[] lineSeparator)
+Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original
+ provided InputStream.
+ |
+
available, mark, markSupported, read, read, reset, skip
close, read
public Base32InputStream(InputStream in)+
in
- InputStream to wrap.public Base32InputStream(InputStream in, + boolean doEncode)+
in
- InputStream to wrap.doEncode
- true if we should encode all data read from us, false if we should decode.public Base32InputStream(InputStream in, + boolean doEncode, + int lineLength, + byte[] lineSeparator)+
in
- InputStream to wrap.doEncode
- true if we should encode all data read from us, false if we should decode.lineLength
- If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
+ nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
+ is false, lineLength is ignored.lineSeparator
- If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
+ If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32OutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32OutputStream.html new file mode 100644 index 0000000..30b4ca9 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base32OutputStream.html @@ -0,0 +1,373 @@ + + + + + + +public class Base32OutputStream +extends BaseNCodecOutputStream+
+ The default behaviour of the Base32OutputStream is to ENCODE, whereas the default behaviour of the Base32InputStream + is to DECODE. But this behaviour can be overridden by using a different constructor. +
++ Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode + character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +
++ Note: It is mandatory to close the stream after the last byte has been written to it, otherwise the + final padding will be omitted and the resulting data will be incomplete/inconsistent. +
out
Constructor and Description | +
---|
Base32OutputStream(OutputStream out)
+Creates a Base32OutputStream such that all data written is Base32-encoded to the original provided OutputStream.
+ |
+
Base32OutputStream(OutputStream out,
+ boolean doEncode)
+Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
+ original provided OutputStream.
+ |
+
Base32OutputStream(OutputStream out,
+ boolean doEncode,
+ int lineLength,
+ byte[] lineSeparator)
+Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the
+ original provided OutputStream.
+ |
+
public Base32OutputStream(OutputStream out)+
out
- OutputStream to wrap.public Base32OutputStream(OutputStream out, + boolean doEncode)+
out
- OutputStream to wrap.doEncode
- true if we should encode all data written to us, false if we should decode.public Base32OutputStream(OutputStream out, + boolean doEncode, + int lineLength, + byte[] lineSeparator)+
out
- OutputStream to wrap.doEncode
- true if we should encode all data written to us, false if we should decode.lineLength
- If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
+ nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
+ is false, lineLength is ignored.lineSeparator
- If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
+ If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64.html new file mode 100644 index 0000000..b88a1f3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64.html @@ -0,0 +1,948 @@ + + + + + + +public class Base64 +extends BaseNCodec+
+ This class implements section 6.8. Base64 Content-Transfer-Encoding from RFC 2045 Multipurpose + Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies by Freed and Borenstein. +
++ The class can be parameterized in the following manner with various constructors: +
++ The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. +
++ Since this class operates directly on byte streams, and not character streams, it is hard-coded to only + encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, + UTF-8, etc). +
++ This class is thread-safe. +
lineLength, MASK_8BITS, MIME_CHUNK_SIZE, pad, PAD, PAD_DEFAULT, PEM_CHUNK_SIZE
Constructor and Description | +
---|
Base64()
+Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
+ |
+
Base64(boolean urlSafe)
+Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode.
+ |
+
Base64(int lineLength)
+Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
+ |
+
Base64(int lineLength,
+ byte[] lineSeparator)
+Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
+ |
+
Base64(int lineLength,
+ byte[] lineSeparator,
+ boolean urlSafe)
+Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static byte[] |
+decodeBase64(byte[] base64Data)
+Decodes Base64 data into octets.
+ |
+
static byte[] |
+decodeBase64(String base64String)
+Decodes a Base64 String into octets.
+ |
+
static BigInteger |
+decodeInteger(byte[] pArray)
+Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
+ |
+
static byte[] |
+encodeBase64(byte[] binaryData)
+Encodes binary data using the base64 algorithm but does not chunk the output.
+ |
+
static byte[] |
+encodeBase64(byte[] binaryData,
+ boolean isChunked)
+Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
+ |
+
static byte[] |
+encodeBase64(byte[] binaryData,
+ boolean isChunked,
+ boolean urlSafe)
+Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
+ |
+
static byte[] |
+encodeBase64(byte[] binaryData,
+ boolean isChunked,
+ boolean urlSafe,
+ int maxResultSize)
+Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks.
+ |
+
static byte[] |
+encodeBase64Chunked(byte[] binaryData)
+Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks
+ |
+
static String |
+encodeBase64String(byte[] binaryData)
+Encodes binary data using the base64 algorithm but does not chunk the output.
+ |
+
static byte[] |
+encodeBase64URLSafe(byte[] binaryData)
+Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output.
+ |
+
static String |
+encodeBase64URLSafeString(byte[] binaryData)
+Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output.
+ |
+
static byte[] |
+encodeInteger(BigInteger bigInt)
+Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature.
+ |
+
static boolean |
+isArrayByteBase64(byte[] arrayOctet)
+Deprecated.
+
+1.5 Use
+isBase64(byte[]) , will be removed in 2.0. |
+
static boolean |
+isBase64(byte octet)
+Returns whether or not the
+octet is in the base 64 alphabet. |
+
static boolean |
+isBase64(byte[] arrayOctet)
+Tests a given byte array to see if it contains only valid characters within the Base64 alphabet.
+ |
+
static boolean |
+isBase64(String base64)
+Tests a given String to see if it contains only valid characters within the Base64 alphabet.
+ |
+
protected boolean |
+isInAlphabet(byte octet)
+Returns whether or not the
+octet is in the Base64 alphabet. |
+
boolean |
+isUrlSafe()
+Returns our current encode mode.
+ |
+
containsAlphabetOrPad, decode, decode, decode, encode, encode, encode, encodeAsString, encodeToString, ensureBufferSize, getDefaultBufferSize, getEncodedLength, isInAlphabet, isInAlphabet, isWhiteSpace
public Base64()+
+ When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. +
+ ++ When decoding all variants are supported. +
public Base64(boolean urlSafe)+
+ When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. +
+ ++ When decoding all variants are supported. +
urlSafe
- if true
, URL-safe encoding is used. In most cases this should be set to
+ false
.public Base64(int lineLength)+
+ When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is + STANDARD_ENCODE_TABLE. +
++ Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +
++ When decoding all variants are supported. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.public Base64(int lineLength, + byte[] lineSeparator)+
+ When encoding the line length and line separator are given in the constructor, and the encoding table is + STANDARD_ENCODE_TABLE. +
++ Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +
++ When decoding all variants are supported. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.lineSeparator
- Each line of encoded data will end with this sequence of bytes.IllegalArgumentException
- Thrown when the provided lineSeparator included some base64 characters.public Base64(int lineLength, + byte[] lineSeparator, + boolean urlSafe)+
+ When encoding the line length and line separator are given in the constructor, and the encoding table is + STANDARD_ENCODE_TABLE. +
++ Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +
++ When decoding all variants are supported. +
lineLength
- Each line of encoded data will be at most of the given length (rounded down to nearest multiple of
+ 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when
+ decoding.lineSeparator
- Each line of encoded data will end with this sequence of bytes.urlSafe
- Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode
+ operations. Decoding seamlessly handles both modes.
+ Note: no padding is added when using the URL-safe alphabet.IllegalArgumentException
- The provided lineSeparator included some base64 characters. That's not going to work!public boolean isUrlSafe()+
@Deprecated +public static boolean isArrayByteBase64(byte[] arrayOctet)+
isBase64(byte[])
, will be removed in 2.0.arrayOctet
- byte array to testtrue
if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
+ false
, otherwisepublic static boolean isBase64(byte octet)+
octet
is in the base 64 alphabet.octet
- The value to testtrue
if the value is defined in the the base 64 alphabet, false
otherwise.public static boolean isBase64(String base64)+
base64
- String to testtrue
if all characters in the String are valid characters in the Base64 alphabet or if
+ the String is empty; false
, otherwisepublic static boolean isBase64(byte[] arrayOctet)+
arrayOctet
- byte array to testtrue
if all bytes are valid characters in the Base64 alphabet or if the byte array is empty;
+ false
, otherwisepublic static byte[] encodeBase64(byte[] binaryData)+
binaryData
- binary data to encodepublic static String encodeBase64String(byte[] binaryData)+
binaryData
- binary data to encodepublic static byte[] encodeBase64URLSafe(byte[] binaryData)+
binaryData
- binary data to encodepublic static String encodeBase64URLSafeString(byte[] binaryData)+
binaryData
- binary data to encodepublic static byte[] encodeBase64Chunked(byte[] binaryData)+
binaryData
- binary data to encodepublic static byte[] encodeBase64(byte[] binaryData, + boolean isChunked)+
binaryData
- Array containing binary data to encode.isChunked
- if true
this encoder will chunk the base64 output into 76 character blocksIllegalArgumentException
- Thrown when the input array needs an output array bigger than Integer.MAX_VALUE
public static byte[] encodeBase64(byte[] binaryData, + boolean isChunked, + boolean urlSafe)+
binaryData
- Array containing binary data to encode.isChunked
- if true
this encoder will chunk the base64 output into 76 character blocksurlSafe
- if true
this encoder will emit - and _ instead of the usual + and / characters.
+ Note: no padding is added when encoding using the URL-safe alphabet.IllegalArgumentException
- Thrown when the input array needs an output array bigger than Integer.MAX_VALUE
public static byte[] encodeBase64(byte[] binaryData, + boolean isChunked, + boolean urlSafe, + int maxResultSize)+
binaryData
- Array containing binary data to encode.isChunked
- if true
this encoder will chunk the base64 output into 76 character blocksurlSafe
- if true
this encoder will emit - and _ instead of the usual + and / characters.
+ Note: no padding is added when encoding using the URL-safe alphabet.maxResultSize
- The maximum result size to accept.IllegalArgumentException
- Thrown when the input array needs an output array bigger than maxResultSizepublic static byte[] decodeBase64(String base64String)+
+ Note: this method seamlessly handles data encoded in URL-safe or normal mode. +
base64String
- String containing Base64 datapublic static byte[] decodeBase64(byte[] base64Data)+
+ Note: this method seamlessly handles data encoded in URL-safe or normal mode. +
base64Data
- Byte array containing Base64 datapublic static BigInteger decodeInteger(byte[] pArray)+
pArray
- a byte array containing base64 character datapublic static byte[] encodeInteger(BigInteger bigInt)+
bigInt
- a BigIntegerNullPointerException
- if null is passed inprotected boolean isInAlphabet(byte octet)+
octet
is in the Base64 alphabet.isInAlphabet
in class BaseNCodec
octet
- The value to testtrue
if the value is defined in the the Base64 alphabet false
otherwise.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64InputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64InputStream.html new file mode 100644 index 0000000..aa13a06 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64InputStream.html @@ -0,0 +1,373 @@ + + + + + + +public class Base64InputStream +extends BaseNCodecInputStream+
+ The default behaviour of the Base64InputStream is to DECODE, whereas the default behaviour of the Base64OutputStream + is to ENCODE, but this behaviour can be overridden by using a different constructor. +
++ This class implements section 6.8. Base64 Content-Transfer-Encoding from RFC 2045 Multipurpose + Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies by Freed and Borenstein. +
++ Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode + character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +
in
Constructor and Description | +
---|
Base64InputStream(InputStream in)
+Creates a Base64InputStream such that all data read is Base64-decoded from the original provided InputStream.
+ |
+
Base64InputStream(InputStream in,
+ boolean doEncode)
+Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
+ provided InputStream.
+ |
+
Base64InputStream(InputStream in,
+ boolean doEncode,
+ int lineLength,
+ byte[] lineSeparator)
+Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original
+ provided InputStream.
+ |
+
available, mark, markSupported, read, read, reset, skip
close, read
public Base64InputStream(InputStream in)+
in
- InputStream to wrap.public Base64InputStream(InputStream in, + boolean doEncode)+
in
- InputStream to wrap.doEncode
- true if we should encode all data read from us, false if we should decode.public Base64InputStream(InputStream in, + boolean doEncode, + int lineLength, + byte[] lineSeparator)+
in
- InputStream to wrap.doEncode
- true if we should encode all data read from us, false if we should decode.lineLength
- If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
+ nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
+ is false, lineLength is ignored.lineSeparator
- If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
+ If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64OutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64OutputStream.html new file mode 100644 index 0000000..c1e222d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Base64OutputStream.html @@ -0,0 +1,377 @@ + + + + + + +public class Base64OutputStream +extends BaseNCodecOutputStream+
+ The default behaviour of the Base64OutputStream is to ENCODE, whereas the default behaviour of the Base64InputStream + is to DECODE. But this behaviour can be overridden by using a different constructor. +
++ This class implements section 6.8. Base64 Content-Transfer-Encoding from RFC 2045 Multipurpose + Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies by Freed and Borenstein. +
++ Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode + character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +
++ Note: It is mandatory to close the stream after the last byte has been written to it, otherwise the + final padding will be omitted and the resulting data will be incomplete/inconsistent. +
out
Constructor and Description | +
---|
Base64OutputStream(OutputStream out)
+Creates a Base64OutputStream such that all data written is Base64-encoded to the original provided OutputStream.
+ |
+
Base64OutputStream(OutputStream out,
+ boolean doEncode)
+Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
+ original provided OutputStream.
+ |
+
Base64OutputStream(OutputStream out,
+ boolean doEncode,
+ int lineLength,
+ byte[] lineSeparator)
+Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the
+ original provided OutputStream.
+ |
+
public Base64OutputStream(OutputStream out)+
out
- OutputStream to wrap.public Base64OutputStream(OutputStream out, + boolean doEncode)+
out
- OutputStream to wrap.doEncode
- true if we should encode all data written to us, false if we should decode.public Base64OutputStream(OutputStream out, + boolean doEncode, + int lineLength, + byte[] lineSeparator)+
out
- OutputStream to wrap.doEncode
- true if we should encode all data written to us, false if we should decode.lineLength
- If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to
+ nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode
+ is false, lineLength is ignored.lineSeparator
- If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n).
+ If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodec.html new file mode 100644 index 0000000..28643a2 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodec.html @@ -0,0 +1,899 @@ + + + + + + +public abstract class BaseNCodec +extends Object +implements BinaryEncoder, BinaryDecoder+
+ This class is thread-safe. +
Modifier and Type | +Field and Description | +
---|---|
protected int |
+lineLength
+Chunksize for encoding.
+ |
+
protected static int |
+MASK_8BITS
+Mask used to extract 8 bits, used in decoding bytes
+ |
+
static int |
+MIME_CHUNK_SIZE
+MIME chunk size per RFC 2045 section 6.8.
+ |
+
protected byte |
+pad |
+
protected byte |
+PAD
+Deprecated.
+
+Use
+pad . Will be removed in 2.0. |
+
protected static byte |
+PAD_DEFAULT
+Byte used to pad output.
+ |
+
static int |
+PEM_CHUNK_SIZE
+PEM chunk size per RFC 1421 section 4.3.2.4.
+ |
+
Modifier | +Constructor and Description | +
---|---|
protected |
+BaseNCodec(int unencodedBlockSize,
+ int encodedBlockSize,
+ int lineLength,
+ int chunkSeparatorLength)
+Note
+lineLength is rounded down to the nearest multiple of encodedBlockSize
+ If chunkSeparatorLength is zero, then chunking is disabled. |
+
protected |
+BaseNCodec(int unencodedBlockSize,
+ int encodedBlockSize,
+ int lineLength,
+ int chunkSeparatorLength,
+ byte pad)
+Note
+lineLength is rounded down to the nearest multiple of encodedBlockSize
+ If chunkSeparatorLength is zero, then chunking is disabled. |
+
Modifier and Type | +Method and Description | +
---|---|
protected boolean |
+containsAlphabetOrPad(byte[] arrayOctet)
+Tests a given byte array to see if it contains any characters within the alphabet or PAD.
+ |
+
byte[] |
+decode(byte[] pArray)
+Decodes a byte[] containing characters in the Base-N alphabet.
+ |
+
Object |
+decode(Object obj)
+Decodes an Object using the Base-N algorithm.
+ |
+
byte[] |
+decode(String pArray)
+Decodes a String containing characters in the Base-N alphabet.
+ |
+
byte[] |
+encode(byte[] pArray)
+Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet.
+ |
+
byte[] |
+encode(byte[] pArray,
+ int offset,
+ int length)
+Encodes a byte[] containing binary data, into a byte[] containing
+ characters in the alphabet.
+ |
+
Object |
+encode(Object obj)
+Encodes an Object using the Base-N algorithm.
+ |
+
String |
+encodeAsString(byte[] pArray)
+Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet.
+ |
+
String |
+encodeToString(byte[] pArray)
+Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet.
+ |
+
protected byte[] |
+ensureBufferSize(int size,
+ org.apache.commons.codec.binary.BaseNCodec.Context context)
+Ensure that the buffer has room for
+size bytes |
+
protected int |
+getDefaultBufferSize()
+Get the default buffer size.
+ |
+
long |
+getEncodedLength(byte[] pArray)
+Calculates the amount of space needed to encode the supplied array.
+ |
+
protected abstract boolean |
+isInAlphabet(byte value)
+Returns whether or not the
+octet is in the current alphabet. |
+
boolean |
+isInAlphabet(byte[] arrayOctet,
+ boolean allowWSPad)
+Tests a given byte array to see if it contains only valid characters within the alphabet.
+ |
+
boolean |
+isInAlphabet(String basen)
+Tests a given String to see if it contains only valid characters within the alphabet.
+ |
+
protected static boolean |
+isWhiteSpace(byte byteToCheck)
+Checks if a byte value is whitespace or not.
+ |
+
public static final int MIME_CHUNK_SIZE+
+ The 76 character limit does not count the trailing CRLF, but counts all other characters, including any + equal signs. +
public static final int PEM_CHUNK_SIZE+
+ The 64 character limit does not count the trailing CRLF, but counts all other characters, including any + equal signs. +
protected static final int MASK_8BITS+
protected static final byte PAD_DEFAULT+
@Deprecated +protected final byte PAD+
pad
. Will be removed in 2.0.protected final byte pad+
protected final int lineLength+
protected BaseNCodec(int unencodedBlockSize, + int encodedBlockSize, + int lineLength, + int chunkSeparatorLength)+
lineLength
is rounded down to the nearest multiple of encodedBlockSize
+ If chunkSeparatorLength
is zero, then chunking is disabled.unencodedBlockSize
- the size of an unencoded block (e.g. Base64 = 3)encodedBlockSize
- the size of an encoded block (e.g. Base64 = 4)lineLength
- if > 0, use chunking with a length lineLength
chunkSeparatorLength
- the chunk separator length, if relevantprotected BaseNCodec(int unencodedBlockSize, + int encodedBlockSize, + int lineLength, + int chunkSeparatorLength, + byte pad)+
lineLength
is rounded down to the nearest multiple of encodedBlockSize
+ If chunkSeparatorLength
is zero, then chunking is disabled.unencodedBlockSize
- the size of an unencoded block (e.g. Base64 = 3)encodedBlockSize
- the size of an encoded block (e.g. Base64 = 4)lineLength
- if > 0, use chunking with a length lineLength
chunkSeparatorLength
- the chunk separator length, if relevantpad
- byte used as padding byte.protected int getDefaultBufferSize()+
DEFAULT_BUFFER_SIZE
protected byte[] ensureBufferSize(int size, + org.apache.commons.codec.binary.BaseNCodec.Context context)+
size
bytessize
- minimum spare space requiredcontext
- the context to be usedprotected static boolean isWhiteSpace(byte byteToCheck)+
byteToCheck
- the byte to checkpublic Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not of type byte[]public String encodeToString(byte[] pArray)+
pArray
- a byte array containing binary datapublic String encodeAsString(byte[] pArray)+
pArray
- a byte array containing binary dataencodeToString(byte[])
; it was merged during refactoring.public Object decode(Object obj) + throws DecoderException+
decode
in interface Decoder
obj
- Object to decodeDecoderException
- if the parameter supplied is not of type byte[]public byte[] decode(String pArray)+
pArray
- A String containing Base-N character datapublic byte[] decode(byte[] pArray)+
decode
in interface BinaryDecoder
pArray
- A byte array containing Base-N character datapublic byte[] encode(byte[] pArray)+
encode
in interface BinaryEncoder
pArray
- a byte array containing binary datapublic byte[] encode(byte[] pArray, + int offset, + int length)+
pArray
- a byte array containing binary dataoffset
- initial offset of the subarray.length
- length of the subarray.protected abstract boolean isInAlphabet(byte value)+
octet
is in the current alphabet.
+ Does not allow whitespace or pad.value
- The value to testtrue
if the value is defined in the current alphabet, false
otherwise.public boolean isInAlphabet(byte[] arrayOctet, + boolean allowWSPad)+
arrayOctet
- byte array to testallowWSPad
- if true
, then whitespace and PAD are also allowedtrue
if all bytes are valid characters in the alphabet or if the byte array is empty;
+ false
, otherwisepublic boolean isInAlphabet(String basen)+
basen
- String to testtrue
if all characters in the String are valid characters in the alphabet or if
+ the String is empty; false
, otherwiseisInAlphabet(byte[], boolean)
protected boolean containsAlphabetOrPad(byte[] arrayOctet)+
arrayOctet
- byte array to testtrue
if any byte is a valid character in the alphabet or PAD; false
otherwisepublic long getEncodedLength(byte[] pArray)+
pArray
- byte[] array which will later be encodedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecInputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecInputStream.html new file mode 100644 index 0000000..ecbed9d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecInputStream.html @@ -0,0 +1,493 @@ + + + + + + +public class BaseNCodecInputStream +extends FilterInputStream+
in
Modifier | +Constructor and Description | +
---|---|
protected |
+BaseNCodecInputStream(InputStream in,
+ BaseNCodec baseNCodec,
+ boolean doEncode) |
+
Modifier and Type | +Method and Description | +
---|---|
int |
+available() |
+
void |
+mark(int readLimit)
+Marks the current position in this input stream.
+ |
+
boolean |
+markSupported() |
+
int |
+read()
+Reads one
+byte from this input stream. |
+
int |
+read(byte[] b,
+ int offset,
+ int len)
+Attempts to read
+len bytes into the specified b array starting at offset
+ from this InputStream. |
+
void |
+reset()
+Repositions this stream to the position at the time the mark method was last called on this input stream.
+ |
+
long |
+skip(long n) |
+
close, read
protected BaseNCodecInputStream(InputStream in, + BaseNCodec baseNCodec, + boolean doEncode)+
public int available() + throws IOException+
available
in class FilterInputStream
0
if the InputStream
has reached EOF
,
+ 1
otherwiseIOException
public void mark(int readLimit)+
The mark(int)
method of BaseNCodecInputStream
does nothing.
mark
in class FilterInputStream
readLimit
- the maximum limit of bytes that can be read before the mark position becomes invalid.public boolean markSupported()+
markSupported
in class FilterInputStream
false
public int read() + throws IOException+
byte
from this input stream.read
in class FilterInputStream
IOException
- if an I/O error occurs.public int read(byte[] b, + int offset, + int len) + throws IOException+
len
bytes into the specified b
array starting at offset
+ from this InputStream.read
in class FilterInputStream
b
- destination byte arrayoffset
- where to start writing the byteslen
- maximum number of bytes to readIOException
- if an I/O error occurs.NullPointerException
- if the byte array parameter is nullIndexOutOfBoundsException
- if offset, len or buffer size are invalidpublic void reset() + throws IOException+
+ The reset()
method of BaseNCodecInputStream
does nothing except throw an IOException
.
reset
in class FilterInputStream
IOException
- if this method is invokedpublic long skip(long n) + throws IOException+
skip
in class FilterInputStream
IllegalArgumentException
- if the provided skip length is negativeIOException
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecOutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecOutputStream.html new file mode 100644 index 0000000..524ecb9 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BaseNCodecOutputStream.html @@ -0,0 +1,457 @@ + + + + + + +public class BaseNCodecOutputStream +extends FilterOutputStream+
+ To write the EOF marker without closing the stream, call eof()
or use an Apache Commons IO CloseShieldOutputStream.
+
out
Constructor and Description | +
---|
BaseNCodecOutputStream(OutputStream out,
+ BaseNCodec basedCodec,
+ boolean doEncode) |
+
Modifier and Type | +Method and Description | +
---|---|
void |
+close()
+Closes this output stream and releases any system resources associated with the stream.
+ |
+
void |
+eof()
+Writes EOF.
+ |
+
void |
+flush()
+Flushes this output stream and forces any buffered output bytes to be written out to the stream.
+ |
+
void |
+write(byte[] b,
+ int offset,
+ int len)
+Writes
+len bytes from the specified b array starting at offset to this
+ output stream. |
+
void |
+write(int i)
+Writes the specified
+byte to this output stream. |
+
write
public BaseNCodecOutputStream(OutputStream out, + BaseNCodec basedCodec, + boolean doEncode)+
public void write(int i) + throws IOException+
byte
to this output stream.write
in class FilterOutputStream
i
- source byteIOException
- if an I/O error occurs.public void write(byte[] b, + int offset, + int len) + throws IOException+
len
bytes from the specified b
array starting at offset
to this
+ output stream.write
in class FilterOutputStream
b
- source byte arrayoffset
- where to start reading the byteslen
- maximum number of bytes to writeIOException
- if an I/O error occurs.NullPointerException
- if the byte array parameter is nullIndexOutOfBoundsException
- if offset, len or buffer size are invalidpublic void flush() + throws IOException+
flush
in interface Flushable
flush
in class FilterOutputStream
IOException
- if an I/O error occurs.public void close() + throws IOException+
+ To write the EOF marker without closing the stream, call eof()
or use an
+ Apache Commons IO CloseShieldOutputStream.
+
close
in interface Closeable
close
in interface AutoCloseable
close
in class FilterOutputStream
IOException
- if an I/O error occurs.public void eof() + throws IOException+
IOException
- if an I/O error occurs.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BinaryCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BinaryCodec.html new file mode 100644 index 0000000..1e91384 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/BinaryCodec.html @@ -0,0 +1,528 @@ + + + + + + +public class BinaryCodec +extends Object +implements BinaryDecoder, BinaryEncoder+
This class is immutable and thread-safe.
+ + TODO: may want to add more bit vector functions like and/or/xor/nand + TODO: also might be good to generate boolean[] from byte[] et cetera.Constructor and Description | +
---|
BinaryCodec() |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] ascii)
+Decodes a byte array where each byte represents an ASCII '0' or '1'.
+ |
+
Object |
+decode(Object ascii)
+Decodes a byte array where each byte represents an ASCII '0' or '1'.
+ |
+
byte[] |
+encode(byte[] raw)
+Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
+ |
+
Object |
+encode(Object raw)
+Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
+ |
+
static byte[] |
+fromAscii(byte[] ascii)
+Decodes a byte array where each byte represents an ASCII '0' or '1'.
+ |
+
static byte[] |
+fromAscii(char[] ascii)
+Decodes a char array where each char represents an ASCII '0' or '1'.
+ |
+
static byte[] |
+toAsciiBytes(byte[] raw)
+Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
+ char.
+ |
+
static char[] |
+toAsciiChars(byte[] raw)
+Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
+ |
+
static String |
+toAsciiString(byte[] raw)
+Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
+ |
+
byte[] |
+toByteArray(String ascii)
+Decodes a String where each char of the String represents an ASCII '0' or '1'.
+ |
+
public byte[] encode(byte[] raw)+
encode
in interface BinaryEncoder
raw
- the raw binary data to convertBinaryEncoder.encode(byte[])
public Object encode(Object raw) + throws EncoderException+
encode
in interface Encoder
raw
- the raw binary data to convertEncoderException
- if the argument is not a byte[]Encoder.encode(Object)
public Object decode(Object ascii) + throws DecoderException+
decode
in interface Decoder
ascii
- each byte represents an ASCII '0' or '1'DecoderException
- if argument is not a byte[], char[] or StringDecoder.decode(Object)
public byte[] decode(byte[] ascii)+
decode
in interface BinaryDecoder
ascii
- each byte represents an ASCII '0' or '1'Decoder.decode(Object)
public byte[] toByteArray(String ascii)+
ascii
- String of '0' and '1' charactersDecoder.decode(Object)
public static byte[] fromAscii(char[] ascii)+
ascii
- each char represents an ASCII '0' or '1'public static byte[] fromAscii(byte[] ascii)+
ascii
- each byte represents an ASCII '0' or '1'public static byte[] toAsciiBytes(byte[] raw)+
raw
- the raw binary data to convertBinaryEncoder.encode(byte[])
public static char[] toAsciiChars(byte[] raw)+
raw
- the raw binary data to convertBinaryEncoder.encode(byte[])
public static String toAsciiString(byte[] raw)+
raw
- the raw binary data to convertBinaryEncoder.encode(byte[])
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/CharSequenceUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/CharSequenceUtils.html new file mode 100644 index 0000000..5ebd663 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/CharSequenceUtils.html @@ -0,0 +1,250 @@ + + + + + + +public class CharSequenceUtils +extends Object+
+ Operations on CharSequence
that are null
safe.
+
+ Copied from Apache Commons Lang r1586295 on April 10, 2014 (day of 3.3.2 release). +
CharSequence
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Hex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Hex.html new file mode 100644 index 0000000..53890f3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/Hex.html @@ -0,0 +1,1009 @@ + + + + + + +public class Hex +extends Object +implements BinaryEncoder, BinaryDecoder+
DEFAULT_CHARSET_NAME
+
+ This class is thread-safe.Modifier and Type | +Field and Description | +
---|---|
static Charset |
+DEFAULT_CHARSET
+Default charset is
+Charsets.UTF_8 |
+
static String |
+DEFAULT_CHARSET_NAME
+Default charset name is
+CharEncoding.UTF_8 |
+
Constructor and Description | +
---|
Hex()
+Creates a new codec with the default charset name
+DEFAULT_CHARSET |
+
Hex(Charset charset)
+Creates a new codec with the given Charset.
+ |
+
Hex(String charsetName)
+Creates a new codec with the given charset name.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] array)
+Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values.
+ |
+
byte[] |
+decode(ByteBuffer buffer)
+Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values.
+ |
+
Object |
+decode(Object object)
+Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those
+ same values.
+ |
+
static byte[] |
+decodeHex(char[] data)
+Converts an array of characters representing hexadecimal values into an array of bytes of those same values.
+ |
+
static byte[] |
+decodeHex(String data)
+Converts a String representing hexadecimal values into an array of bytes of those same values.
+ |
+
byte[] |
+encode(byte[] array)
+Converts an array of bytes into an array of bytes for the characters representing the hexadecimal values of each
+ byte in order.
+ |
+
byte[] |
+encode(ByteBuffer array)
+Converts byte buffer into an array of bytes for the characters representing the hexadecimal values of each
+ byte in order.
+ |
+
Object |
+encode(Object object)
+Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each
+ byte in order.
+ |
+
static char[] |
+encodeHex(byte[] data)
+Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
static char[] |
+encodeHex(byte[] data,
+ boolean toLowerCase)
+Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
protected static char[] |
+encodeHex(byte[] data,
+ char[] toDigits)
+Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
static char[] |
+encodeHex(ByteBuffer data)
+Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
static char[] |
+encodeHex(ByteBuffer data,
+ boolean toLowerCase)
+Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
protected static char[] |
+encodeHex(ByteBuffer data,
+ char[] toDigits)
+Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order.
+ |
+
static String |
+encodeHexString(byte[] data)
+Converts an array of bytes into a String representing the hexadecimal values of each byte in order.
+ |
+
static String |
+encodeHexString(byte[] data,
+ boolean toLowerCase)
+Converts an array of bytes into a String representing the hexadecimal values of each byte in order.
+ |
+
static String |
+encodeHexString(ByteBuffer data)
+Converts a byte buffer into a String representing the hexadecimal values of each byte in order.
+ |
+
static String |
+encodeHexString(ByteBuffer data,
+ boolean toLowerCase)
+Converts a byte buffer into a String representing the hexadecimal values of each byte in order.
+ |
+
Charset |
+getCharset()
+Gets the charset.
+ |
+
String |
+getCharsetName()
+Gets the charset name.
+ |
+
protected static int |
+toDigit(char ch,
+ int index)
+Converts a hexadecimal character to an integer.
+ |
+
String |
+toString()
+Returns a string representation of the object, which includes the charset name.
+ |
+
public static final Charset DEFAULT_CHARSET+
Charsets.UTF_8
public static final String DEFAULT_CHARSET_NAME+
CharEncoding.UTF_8
public Hex()+
DEFAULT_CHARSET
public Hex(Charset charset)+
charset
- the charset.public Hex(String charsetName)+
charsetName
- the charset name.UnsupportedCharsetException
- If the named charset is unavailablepublic static byte[] decodeHex(String data) + throws DecoderException+
data
- A String containing hexadecimal digitsDecoderException
- Thrown if an odd number or illegal of characters is suppliedpublic static byte[] decodeHex(char[] data) + throws DecoderException+
data
- An array of characters containing hexadecimal digitsDecoderException
- Thrown if an odd number or illegal of characters is suppliedpublic static char[] encodeHex(byte[] data)+
data
- a byte[] to convert to Hex characterspublic static char[] encodeHex(ByteBuffer data)+
data
- a byte buffer to convert to Hex characterspublic static char[] encodeHex(byte[] data, + boolean toLowerCase)+
data
- a byte[] to convert to Hex characterstoLowerCase
- true
converts to lowercase, false
to uppercasepublic static char[] encodeHex(ByteBuffer data, + boolean toLowerCase)+
data
- a byte buffer to convert to Hex characterstoLowerCase
- true
converts to lowercase, false
to uppercaseprotected static char[] encodeHex(byte[] data, + char[] toDigits)+
data
- a byte[] to convert to Hex characterstoDigits
- the output alphabet (must contain at least 16 chars)protected static char[] encodeHex(ByteBuffer data, + char[] toDigits)+
data
- a byte buffer to convert to Hex characterstoDigits
- the output alphabet (must be at least 16 characters)public static String encodeHexString(byte[] data)+
data
- a byte[] to convert to Hex characterspublic static String encodeHexString(byte[] data, + boolean toLowerCase)+
data
- a byte[] to convert to Hex characterstoLowerCase
- true
converts to lowercase, false
to uppercasepublic static String encodeHexString(ByteBuffer data)+
data
- a byte buffer to convert to Hex characterspublic static String encodeHexString(ByteBuffer data, + boolean toLowerCase)+
data
- a byte buffer to convert to Hex characterstoLowerCase
- true
converts to lowercase, false
to uppercaseprotected static int toDigit(char ch, + int index) + throws DecoderException+
ch
- A character to convert to an integer digitindex
- The index of the character in the sourceDecoderException
- Thrown if ch is an illegal hex characterpublic byte[] decode(byte[] array) + throws DecoderException+
decode
in interface BinaryDecoder
array
- An array of character bytes containing hexadecimal digitsDecoderException
- Thrown if an odd number of characters is supplied to this functiondecodeHex(char[])
public byte[] decode(ByteBuffer buffer) + throws DecoderException+
buffer
- An array of character bytes containing hexadecimal digitsDecoderException
- Thrown if an odd number of characters is supplied to this functiondecodeHex(char[])
public Object decode(Object object) + throws DecoderException+
decode
in interface Decoder
object
- A String, ByteBuffer, byte[], or an array of character bytes containing hexadecimal digitsDecoderException
- Thrown if an odd number of characters is supplied to this function or the object is not a String or
+ char[]decodeHex(char[])
public byte[] encode(byte[] array)+
+ The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
+ getCharset()
.
+
encode
in interface BinaryEncoder
array
- a byte[] to convert to Hex charactersencodeHex(byte[])
public byte[] encode(ByteBuffer array)+
+ The conversion from hexadecimal characters to the returned bytes is performed with the charset named by
+ getCharset()
.
+
array
- a byte buffer to convert to Hex charactersencodeHex(byte[])
public Object encode(Object object) + throws EncoderException+
+ The conversion from hexadecimal characters to bytes to be encoded to performed with the charset named by
+ getCharset()
.
+
encode
in interface Encoder
object
- a String, ByteBuffer, or byte[] to convert to Hex charactersEncoderException
- Thrown if the given object is not a String or byte[]encodeHex(byte[])
public Charset getCharset()+
public String getCharsetName()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/StringUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/StringUtils.html new file mode 100644 index 0000000..22105df --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/StringUtils.html @@ -0,0 +1,787 @@ + + + + + + +public class StringUtils +extends Object+
This class is immutable and thread-safe.
CharEncoding
,
+Standard charsetsConstructor and Description | +
---|
StringUtils() |
+
Modifier and Type | +Method and Description | +
---|---|
static boolean |
+equals(CharSequence cs1,
+ CharSequence cs2)
+
+ Compares two CharSequences, returning
+true if they represent equal sequences of characters. |
+
static ByteBuffer |
+getByteBufferUtf8(String string)
+Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesIso8859_1(String string)
+Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
+ byte array.
+ |
+
static byte[] |
+getBytesUnchecked(String string,
+ String charsetName)
+Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesUsAscii(String string)
+Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesUtf16(String string)
+Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesUtf16Be(String string)
+Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesUtf16Le(String string)
+Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
+ array.
+ |
+
static byte[] |
+getBytesUtf8(String string)
+Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
+ array.
+ |
+
static String |
+newString(byte[] bytes,
+ String charsetName)
+Constructs a new
+String by decoding the specified array of bytes using the given charset. |
+
static String |
+newStringIso8859_1(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the ISO-8859-1 charset. |
+
static String |
+newStringUsAscii(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the US-ASCII charset. |
+
static String |
+newStringUtf16(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the UTF-16 charset. |
+
static String |
+newStringUtf16Be(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the UTF-16BE charset. |
+
static String |
+newStringUtf16Le(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the UTF-16LE charset. |
+
static String |
+newStringUtf8(byte[] bytes)
+Constructs a new
+String by decoding the specified array of bytes using the UTF-8 charset. |
+
public static boolean equals(CharSequence cs1, + CharSequence cs2)+
+ Compares two CharSequences, returning true
if they represent equal sequences of characters.
+
+ null
s are handled without exceptions. Two null
references are considered to be equal.
+ The comparison is case sensitive.
+
+ StringUtils.equals(null, null) = true + StringUtils.equals(null, "abc") = false + StringUtils.equals("abc", null) = false + StringUtils.equals("abc", "abc") = true + StringUtils.equals("abc", "ABC") = false ++ +
+ Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). +
cs1
- the first CharSequence, may be null
cs2
- the second CharSequence, may be null
true
if the CharSequences are equal (case-sensitive), or both null
Object.equals(Object)
public static ByteBuffer getByteBufferUtf8(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.UTF_8
is not initialized, which should never happen since it is
+ required by the Java platform specification.getBytesUnchecked(String, String)
public static byte[] getBytesIso8859_1(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.ISO_8859_1
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static byte[] getBytesUnchecked(String string, + String charsetName)+
+ This method catches UnsupportedEncodingException
and rethrows it as IllegalStateException
, which
+ should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
+
string
- the String to encode, may be null
charsetName
- The name of a required Charset
null
if the input string was null
IllegalStateException
- Thrown when a UnsupportedEncodingException
is caught, which should never happen for a
+ required charset name.CharEncoding
,
+String.getBytes(String)
public static byte[] getBytesUsAscii(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.US_ASCII
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static byte[] getBytesUtf16(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.UTF_16
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static byte[] getBytesUtf16Be(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.UTF_16BE
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static byte[] getBytesUtf16Le(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.UTF_16LE
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static byte[] getBytesUtf8(String string)+
string
- the String to encode, may be null
null
if the input string was null
NullPointerException
- Thrown if Charsets.UTF_8
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptiongetBytesUnchecked(String, String)
public static String newString(byte[] bytes, + String charsetName)+
String
by decoding the specified array of bytes using the given charset.
+
+ This method catches UnsupportedEncodingException
and re-throws it as IllegalStateException
, which
+ should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
+
bytes
- The bytes to be decoded into characters, may be null
charsetName
- The name of a required Charset
String
decoded from the specified array of bytes using the given charset,
+ or null
if the input byte array was null
.IllegalStateException
- Thrown when a UnsupportedEncodingException
is caught, which should never happen for a
+ required charset name.CharEncoding
,
+String.String(byte[], String)
public static String newStringIso8859_1(byte[] bytes)+
String
by decoding the specified array of bytes using the ISO-8859-1 charset.bytes
- The bytes to be decoded into characters, may be null
String
decoded from the specified array of bytes using the ISO-8859-1 charset, or
+ null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.ISO_8859_1
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionpublic static String newStringUsAscii(byte[] bytes)+
String
by decoding the specified array of bytes using the US-ASCII charset.bytes
- The bytes to be decoded into charactersString
decoded from the specified array of bytes using the US-ASCII charset,
+ or null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.US_ASCII
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionpublic static String newStringUtf16(byte[] bytes)+
String
by decoding the specified array of bytes using the UTF-16 charset.bytes
- The bytes to be decoded into charactersString
decoded from the specified array of bytes using the UTF-16 charset
+ or null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.UTF_16
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionpublic static String newStringUtf16Be(byte[] bytes)+
String
by decoding the specified array of bytes using the UTF-16BE charset.bytes
- The bytes to be decoded into charactersString
decoded from the specified array of bytes using the UTF-16BE charset,
+ or null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.UTF_16BE
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionpublic static String newStringUtf16Le(byte[] bytes)+
String
by decoding the specified array of bytes using the UTF-16LE charset.bytes
- The bytes to be decoded into charactersString
decoded from the specified array of bytes using the UTF-16LE charset,
+ or null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.UTF_16LE
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionpublic static String newStringUtf8(byte[] bytes)+
String
by decoding the specified array of bytes using the UTF-8 charset.bytes
- The bytes to be decoded into charactersString
decoded from the specified array of bytes using the UTF-8 charset,
+ or null
if the input byte array was null
.NullPointerException
- Thrown if Charsets.UTF_8
is not initialized, which should never happen since it is
+ required by the Java platform specification.NullPointerException
instead of UnsupportedEncodingExceptionCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32.html new file mode 100644 index 0000000..c8c3544 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32InputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32InputStream.html new file mode 100644 index 0000000..05e6b95 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32InputStream.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32OutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32OutputStream.html new file mode 100644 index 0000000..2b562cb --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base32OutputStream.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64.html new file mode 100644 index 0000000..0142ddb --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64InputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64InputStream.html new file mode 100644 index 0000000..add5e2c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64InputStream.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64OutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64OutputStream.html new file mode 100644 index 0000000..f173ddf --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Base64OutputStream.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodec.html new file mode 100644 index 0000000..ecc026c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodec.html @@ -0,0 +1,193 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32
+Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
class |
+Base64
+Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
Constructor and Description | +
---|
BaseNCodecInputStream(InputStream in,
+ BaseNCodec baseNCodec,
+ boolean doEncode) |
+
BaseNCodecOutputStream(OutputStream out,
+ BaseNCodec basedCodec,
+ boolean doEncode) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecInputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecInputStream.html new file mode 100644 index 0000000..f6e0b58 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecInputStream.html @@ -0,0 +1,175 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32InputStream
+Provides Base32 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
class |
+Base64InputStream
+Provides Base64 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecOutputStream.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecOutputStream.html new file mode 100644 index 0000000..055c5c1 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BaseNCodecOutputStream.html @@ -0,0 +1,175 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32OutputStream
+Provides Base32 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
class |
+Base64OutputStream
+Provides Base64 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BinaryCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BinaryCodec.html new file mode 100644 index 0000000..4b24bfa --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/BinaryCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/CharSequenceUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/CharSequenceUtils.html new file mode 100644 index 0000000..becff3d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/CharSequenceUtils.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Hex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Hex.html new file mode 100644 index 0000000..34c0622 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/Hex.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/StringUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/StringUtils.html new file mode 100644 index 0000000..1c1c562 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/class-use/StringUtils.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-frame.html new file mode 100644 index 0000000..54de929 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-frame.html @@ -0,0 +1,32 @@ + + + + + + +See: Description
+Class | +Description | +
---|---|
Base32 | +
+ Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
Base32InputStream | +
+ Provides Base32 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
Base32OutputStream | +
+ Provides Base32 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
Base64 | +
+ Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
Base64InputStream | +
+ Provides Base64 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
Base64OutputStream | +
+ Provides Base64 encoding and decoding in a streaming fashion (unlimited size).
+ |
+
BaseNCodec | +
+ Abstract superclass for Base-N encoders and decoders.
+ |
+
BaseNCodecInputStream | +
+ Abstract superclass for Base-N input streams.
+ |
+
BaseNCodecOutputStream | +
+ Abstract superclass for Base-N output streams.
+ |
+
BinaryCodec | +
+ Converts between byte arrays and strings of "0"s and "1"s.
+ |
+
CharSequenceUtils | +
+
+ Operations on
+CharSequence that are null safe. |
+
Hex | +
+ Converts hexadecimal Strings.
+ |
+
StringUtils | +
+ Converts String to and from bytes using the encodings required by the Java specification.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-tree.html new file mode 100644 index 0000000..d1e8be1 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-tree.html @@ -0,0 +1,175 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-use.html new file mode 100644 index 0000000..870a565 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/binary/package-use.html @@ -0,0 +1,172 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
Class and Description | +
---|
BaseNCodec
+ Abstract superclass for Base-N encoders and decoders.
+ |
+
BaseNCodecInputStream
+ Abstract superclass for Base-N input streams.
+ |
+
BaseNCodecOutputStream
+ Abstract superclass for Base-N output streams.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryDecoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryDecoder.html new file mode 100644 index 0000000..85d2688 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryDecoder.html @@ -0,0 +1,232 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32
+Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
class |
+Base64
+Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
class |
+BaseNCodec
+Abstract superclass for Base-N encoders and decoders.
+ |
+
class |
+BinaryCodec
+Converts between byte arrays and strings of "0"s and "1"s.
+ |
+
class |
+Hex
+Converts hexadecimal Strings.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+PercentCodec
+Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryEncoder.html new file mode 100644 index 0000000..5ff4e07 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/BinaryEncoder.html @@ -0,0 +1,232 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32
+Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
class |
+Base64
+Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
class |
+BaseNCodec
+Abstract superclass for Base-N encoders and decoders.
+ |
+
class |
+BinaryCodec
+Converts between byte arrays and strings of "0"s and "1"s.
+ |
+
class |
+Hex
+Converts hexadecimal Strings.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+PercentCodec
+Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/CharEncoding.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/CharEncoding.html new file mode 100644 index 0000000..119b601 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/CharEncoding.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Charsets.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Charsets.html new file mode 100644 index 0000000..7aafb3a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Charsets.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Decoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Decoder.html new file mode 100644 index 0000000..cdb1a1f --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Decoder.html @@ -0,0 +1,280 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Interface and Description | +
---|---|
interface |
+BinaryDecoder
+Defines common decoding methods for byte array decoders.
+ |
+
interface |
+StringDecoder
+Defines common decoding methods for String decoders.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32
+Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
class |
+Base64
+Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
class |
+BaseNCodec
+Abstract superclass for Base-N encoders and decoders.
+ |
+
class |
+BinaryCodec
+Converts between byte arrays and strings of "0"s and "1"s.
+ |
+
class |
+Hex
+Converts hexadecimal Strings.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BCodec
+Identical to the Base64 encoding defined by RFC 1521
+ and allows a character set to be specified.
+ |
+
class |
+PercentCodec
+Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification.
+ |
+
class |
+QCodec
+Similar to the Quoted-Printable content-transfer-encoding defined in
+ RFC 1521 and designed to allow text containing mostly ASCII
+ characters to be decipherable on an ASCII terminal without decoding.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/DecoderException.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/DecoderException.html new file mode 100644 index 0000000..1a95548 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/DecoderException.html @@ -0,0 +1,382 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+BinaryDecoder.decode(byte[] source)
+Decodes a byte array and returns the results as a byte array.
+ |
+
Object |
+Decoder.decode(Object source)
+Decodes an "encoded" Object and returns a "decoded" Object.
+ |
+
String |
+StringDecoder.decode(String source)
+Decodes a String and returns a String.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+Hex.decode(byte[] array)
+Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values.
+ |
+
byte[] |
+Hex.decode(ByteBuffer buffer)
+Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values.
+ |
+
Object |
+BinaryCodec.decode(Object ascii)
+Decodes a byte array where each byte represents an ASCII '0' or '1'.
+ |
+
Object |
+Hex.decode(Object object)
+Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those
+ same values.
+ |
+
Object |
+BaseNCodec.decode(Object obj)
+Decodes an Object using the Base-N algorithm.
+ |
+
static byte[] |
+Hex.decodeHex(char[] data)
+Converts an array of characters representing hexadecimal values into an array of bytes of those same values.
+ |
+
static byte[] |
+Hex.decodeHex(String data)
+Converts a String representing hexadecimal values into an array of bytes of those same values.
+ |
+
protected static int |
+Hex.toDigit(char ch,
+ int index)
+Converts a hexadecimal character to an integer.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+URLCodec.decode(byte[] bytes)
+Decodes an array of URL safe 7-bit characters into an array of original bytes.
+ |
+
byte[] |
+PercentCodec.decode(byte[] bytes)
+Decode bytes encoded with Percent-Encoding based on RFC 3986.
+ |
+
byte[] |
+QuotedPrintableCodec.decode(byte[] bytes)
+Decodes an array of quoted-printable characters into an array of original bytes.
+ |
+
Object |
+BCodec.decode(Object value)
+Decodes a Base64 object into its original form.
+ |
+
Object |
+URLCodec.decode(Object obj)
+Decodes a URL safe object into its original form.
+ |
+
Object |
+PercentCodec.decode(Object obj)
+Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding.
+ |
+
Object |
+QCodec.decode(Object obj)
+Decodes a quoted-printable object into its original form.
+ |
+
Object |
+QuotedPrintableCodec.decode(Object obj)
+Decodes a quoted-printable object into its original form.
+ |
+
String |
+BCodec.decode(String value)
+Decodes a Base64 string into its original form.
+ |
+
String |
+URLCodec.decode(String str)
+Decodes a URL safe string into its original form using the default string charset.
+ |
+
String |
+QCodec.decode(String str)
+Decodes a quoted-printable string into its original form.
+ |
+
String |
+QuotedPrintableCodec.decode(String sourceStr)
+Decodes a quoted-printable string into its original form using the default string Charset.
+ |
+
String |
+QuotedPrintableCodec.decode(String sourceStr,
+ Charset sourceCharset)
+Decodes a quoted-printable string into its original form using the specified string Charset.
+ |
+
String |
+URLCodec.decode(String str,
+ String charsetName)
+Decodes a URL safe string into its original form using the specified encoding.
+ |
+
String |
+QuotedPrintableCodec.decode(String sourceStr,
+ String sourceCharset)
+Decodes a quoted-printable string into its original form using the specified string Charset.
+ |
+
static byte[] |
+QuotedPrintableCodec.decodeQuotedPrintable(byte[] bytes)
+Decodes an array quoted-printable characters into an array of original bytes.
+ |
+
static byte[] |
+URLCodec.decodeUrl(byte[] bytes)
+Decodes an array of URL safe 7-bit characters into an array of original bytes.
+ |
+
protected byte[] |
+QCodec.doDecoding(byte[] bytes) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Encoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Encoder.html new file mode 100644 index 0000000..e433707 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/Encoder.html @@ -0,0 +1,400 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Interface and Description | +
---|---|
interface |
+BinaryEncoder
+Defines common encoding methods for byte array encoders.
+ |
+
interface |
+StringEncoder
+Defines common encoding methods for String encoders.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Base32
+Provides Base32 encoding and decoding as defined by RFC 4648.
+ |
+
class |
+Base64
+Provides Base64 encoding and decoding as defined by RFC 2045.
+ |
+
class |
+BaseNCodec
+Abstract superclass for Base-N encoders and decoders.
+ |
+
class |
+BinaryCodec
+Converts between byte arrays and strings of "0"s and "1"s.
+ |
+
class |
+Hex
+Converts hexadecimal Strings.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+AbstractCaverphone
+Encodes a string into a Caverphone value.
+ |
+
class |
+Caverphone
+Deprecated.
+
+1.5 Replaced by
+Caverphone2 , will be removed in 2.0. |
+
class |
+Caverphone1
+Encodes a string into a Caverphone 1.0 value.
+ |
+
class |
+Caverphone2
+Encodes a string into a Caverphone 2.0 value.
+ |
+
class |
+ColognePhonetic
+Encodes a string into a Cologne Phonetic value.
+ |
+
class |
+DaitchMokotoffSoundex
+Encodes a string into a Daitch-Mokotoff Soundex value.
+ |
+
class |
+DoubleMetaphone
+Encodes a string into a double metaphone value.
+ |
+
class |
+MatchRatingApproachEncoder
+Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
+ |
+
class |
+Metaphone
+Encodes a string into a Metaphone value.
+ |
+
class |
+Nysiis
+Encodes a string into a NYSIIS value.
+ |
+
class |
+RefinedSoundex
+Encodes a string into a Refined Soundex value.
+ |
+
class |
+Soundex
+Encodes a string into a Soundex value.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BeiderMorseEncoder
+Encodes strings into their Beider-Morse phonetic encoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BCodec
+Identical to the Base64 encoding defined by RFC 1521
+ and allows a character set to be specified.
+ |
+
class |
+PercentCodec
+Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification.
+ |
+
class |
+QCodec
+Similar to the Quoted-Printable content-transfer-encoding defined in
+ RFC 1521 and designed to allow text containing mostly ASCII
+ characters to be decipherable on an ASCII terminal without decoding.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/EncoderException.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/EncoderException.html new file mode 100644 index 0000000..aece267 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/EncoderException.html @@ -0,0 +1,459 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+BinaryEncoder.encode(byte[] source)
+Encodes a byte array and return the encoded data as a byte array.
+ |
+
Object |
+Encoder.encode(Object source)
+Encodes an "Object" and returns the encoded content as an Object.
+ |
+
String |
+StringEncoder.encode(String source)
+Encodes a String and returns a String.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+BinaryCodec.encode(Object raw)
+Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
+ |
+
Object |
+Hex.encode(Object object)
+Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each
+ byte in order.
+ |
+
Object |
+BaseNCodec.encode(Object obj)
+Encodes an Object using the Base-N algorithm.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
int |
+Soundex.difference(String s1,
+ String s2)
+Encodes the Strings and returns the number of characters in the two encoded Strings that are the same.
+ |
+
int |
+RefinedSoundex.difference(String s1,
+ String s2)
+Returns the number of characters in the two encoded Strings that are the
+ same.
+ |
+
Object |
+Caverphone.encode(Object obj)
+Deprecated.
+Encodes an Object using the caverphone algorithm.
+ |
+
Object |
+Nysiis.encode(Object obj)
+Encodes an Object using the NYSIIS algorithm.
+ |
+
Object |
+DaitchMokotoffSoundex.encode(Object obj)
+Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
+ |
+
Object |
+Soundex.encode(Object obj)
+Encodes an Object using the soundex algorithm.
+ |
+
Object |
+MatchRatingApproachEncoder.encode(Object pObject)
+Encodes an Object using the Match Rating Approach algorithm.
+ |
+
Object |
+DoubleMetaphone.encode(Object obj)
+Encode the value using DoubleMetaphone.
+ |
+
Object |
+Metaphone.encode(Object obj)
+Encodes an Object using the metaphone algorithm.
+ |
+
Object |
+AbstractCaverphone.encode(Object source)
+Encodes an Object using the caverphone algorithm.
+ |
+
Object |
+RefinedSoundex.encode(Object obj)
+Encodes an Object using the refined soundex algorithm.
+ |
+
Object |
+ColognePhonetic.encode(Object object) |
+
boolean |
+AbstractCaverphone.isEncodeEqual(String str1,
+ String str2)
+Tests if the encodings of two strings are equal.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+BeiderMorseEncoder.encode(Object source) |
+
String |
+BeiderMorseEncoder.encode(String source) |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+PercentCodec.encode(byte[] bytes)
+Percent-Encoding based on RFC 3986.
+ |
+
Object |
+BCodec.encode(Object value)
+Encodes an object into its Base64 form using the default Charset.
+ |
+
Object |
+URLCodec.encode(Object obj)
+Encodes an object into its URL safe form.
+ |
+
Object |
+PercentCodec.encode(Object obj)
+Encodes an object into using the Percent-Encoding.
+ |
+
Object |
+QCodec.encode(Object obj)
+Encodes an object into its quoted-printable form using the default Charset.
+ |
+
Object |
+QuotedPrintableCodec.encode(Object obj)
+Encodes an object into its quoted-printable safe form.
+ |
+
String |
+BCodec.encode(String strSource)
+Encodes a string into its Base64 form using the default Charset.
+ |
+
String |
+URLCodec.encode(String str)
+Encodes a string into its URL safe form using the default string charset.
+ |
+
String |
+QCodec.encode(String sourceStr)
+Encodes a string into its quoted-printable form using the default Charset.
+ |
+
String |
+QuotedPrintableCodec.encode(String sourceStr)
+Encodes a string into its quoted-printable form using the default string Charset.
+ |
+
String |
+BCodec.encode(String strSource,
+ Charset sourceCharset)
+Encodes a string into its Base64 form using the specified Charset.
+ |
+
String |
+QCodec.encode(String sourceStr,
+ Charset sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
String |
+BCodec.encode(String strSource,
+ String sourceCharset)
+Encodes a string into its Base64 form using the specified Charset.
+ |
+
String |
+QCodec.encode(String sourceStr,
+ String sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringDecoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringDecoder.html new file mode 100644 index 0000000..4e30341 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringDecoder.html @@ -0,0 +1,191 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BCodec
+Identical to the Base64 encoding defined by RFC 1521
+ and allows a character set to be specified.
+ |
+
class |
+QCodec
+Similar to the Quoted-Printable content-transfer-encoding defined in
+ RFC 1521 and designed to allow text containing mostly ASCII
+ characters to be decipherable on an ASCII terminal without decoding.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoder.html new file mode 100644 index 0000000..bf70433 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoder.html @@ -0,0 +1,336 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Constructor and Description | +
---|
StringEncoderComparator(StringEncoder stringEncoder)
+Constructs a new instance with the given algorithm.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+AbstractCaverphone
+Encodes a string into a Caverphone value.
+ |
+
class |
+Caverphone
+Deprecated.
+
+1.5 Replaced by
+Caverphone2 , will be removed in 2.0. |
+
class |
+Caverphone1
+Encodes a string into a Caverphone 1.0 value.
+ |
+
class |
+Caverphone2
+Encodes a string into a Caverphone 2.0 value.
+ |
+
class |
+ColognePhonetic
+Encodes a string into a Cologne Phonetic value.
+ |
+
class |
+DaitchMokotoffSoundex
+Encodes a string into a Daitch-Mokotoff Soundex value.
+ |
+
class |
+DoubleMetaphone
+Encodes a string into a double metaphone value.
+ |
+
class |
+MatchRatingApproachEncoder
+Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
+ |
+
class |
+Metaphone
+Encodes a string into a Metaphone value.
+ |
+
class |
+Nysiis
+Encodes a string into a NYSIIS value.
+ |
+
class |
+RefinedSoundex
+Encodes a string into a Refined Soundex value.
+ |
+
class |
+Soundex
+Encodes a string into a Soundex value.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BeiderMorseEncoder
+Encodes strings into their Beider-Morse phonetic encoding.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+BCodec
+Identical to the Base64 encoding defined by RFC 1521
+ and allows a character set to be specified.
+ |
+
class |
+QCodec
+Similar to the Quoted-Printable content-transfer-encoding defined in
+ RFC 1521 and designed to allow text containing mostly ASCII
+ characters to be decipherable on an ASCII terminal without decoding.
+ |
+
class |
+QuotedPrintableCodec
+Codec for the Quoted-Printable section of RFC 1521.
+ |
+
class |
+URLCodec
+Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoderComparator.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoderComparator.html new file mode 100644 index 0000000..164e876 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/class-use/StringEncoderComparator.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/Digest.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/Digest.html new file mode 100644 index 0000000..c6c814c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/Digest.html @@ -0,0 +1,279 @@ + + + + + + +public class Digest +extends Object+
main(String[])
Modifier and Type | +Method and Description | +
---|---|
static void |
+main(String[] args)
+Runs the digest algorithm in
+args[0] on the file in args[1] . |
+
String |
+toString() |
+
public static void main(String[] args) + throws IOException+
args[0]
on the file in args[1]
. If there is no args[1]
, use
+ standard input.
+
+
+ The algorithm can also be ALL
or *
to output one line for each known algorithm.
+
args
- args[0]
is one of MessageDigestAlgorithms
name,
+ MessageDigest
name, ALL
, or *
.
+ args[1+]
is a FILE/DIRECTORY/String.IOException
- if an error occursCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/class-use/Digest.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/class-use/Digest.html new file mode 100644 index 0000000..ff851b5 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/class-use/Digest.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-frame.html new file mode 100644 index 0000000..870f6c1 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-frame.html @@ -0,0 +1,20 @@ + + + + + + +Class | +Description | +
---|---|
Digest | +
+ A minimal command line to run digest over files, directories or a string
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-tree.html new file mode 100644 index 0000000..5b73313 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-tree.html @@ -0,0 +1,138 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-use.html new file mode 100644 index 0000000..5b149b3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/cli/package-use.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Crypt.html new file mode 100644 index 0000000..c85a9b5 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Crypt.html @@ -0,0 +1,441 @@ + + + + + + +public class Crypt +extends Object+
+ See crypt(String, String)
for further details.
+
+ This class is immutable and thread-safe.
Modifier and Type | +Method and Description | +
---|---|
static String |
+crypt(byte[] keyBytes)
+Encrypts a password in a crypt(3) compatible way.
+ |
+
static String |
+crypt(byte[] keyBytes,
+ String salt)
+Encrypts a password in a crypt(3) compatible way.
+ |
+
static String |
+crypt(String key)
+Calculates the digest using the strongest crypt(3) algorithm.
+ |
+
static String |
+crypt(String key,
+ String salt)
+Encrypts a password in a crypt(3) compatible way.
+ |
+
public static String crypt(byte[] keyBytes)+
+ A random salt and the default algorithm (currently SHA-512) are used. See crypt(String, String)
for
+ details.
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling crypt(byte[], String)
.
+
keyBytes
- plaintext passwordIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String crypt(byte[] keyBytes, + String salt)+
+ If no salt is provided, a random salt and the default algorithm (currently SHA-512) will be used. See
+ crypt(String, String)
for details.
keyBytes
- plaintext passwordsalt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using ThreadLocalRandom
; for more secure salts consider using SecureRandom
to
+ generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String crypt(String key)+
+ A random salt and the default algorithm (currently SHA-512) are used. +
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling crypt(String, String)
.
+
key
- plaintext passwordIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.crypt(String, String)
public static String crypt(String key, + String salt)+
+ The exact algorithm depends on the format of the salt string: +
$6$
and are up to 16 chars long.
+ $5$
and are up to 16 chars long
+ $1$
and are up to 8 chars long
+ "$apr1$"
and "$2a$"
are not recognized by this method as its output should be
+ identical with that of the libc implementation.
+
+ The rest of the salt string is drawn from the set [a-zA-Z0-9./]
and is cut at the maximum length of if a
+ "$"
sign is encountered. It is therefore valid to enter a complete hash value as salt to e.g. verify a
+ password with:
+
+
+ storedPwd.equals(crypt(enteredPwd, storedPwd)) ++
+ The resulting string starts with the marker string ($n$
), where n is the same as the input salt.
+ The salt is then appended, followed by a "$"
sign.
+ This is followed by the actual hash value.
+ For DES the string only contains the salt and actual hash.
+ The total length is dependent on the algorithm used:
+
+ Example: + +
+ crypt("secret", "$1$xxxx") => "$1$xxxx$aMkevjfEIpa35Bh3G4bAc." + crypt("secret", "xx") => "xxWAum7tHdIUw" ++
+ This method comes in a variation that accepts a byte[] array to support input strings that are not encoded in + UTF-8 but e.g. in ISO-8859-1 where equal characters result in different byte values.
key
- plaintext password as entered by the usedsalt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using ThreadLocalRandom
; for more secure salts consider using SecureRandom
to
+ generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught. *Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/DigestUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/DigestUtils.html new file mode 100644 index 0000000..99cf783 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/DigestUtils.html @@ -0,0 +1,2906 @@ + + + + + + +public class DigestUtils +extends Object+
MessageDigest
tasks.
+ This class is immutable and thread-safe.
+ However the MessageDigest instances it creates generally won't be.
+
+ The MessageDigestAlgorithms
class provides constants for standard
+ digest algorithms that can be used with the getDigest(String)
method
+ and other methods that require the Digest algorithm name.
+
+ Note: the class has short-hand methods for all the algorithms present as standard in Java 6. + This approach requires lots of methods for each algorithm, and quickly becomes unwieldy. + The following code works with all algorithms: +
+ import static org.apache.commons.codec.digest.MessageDigestAlgorithms.SHA_224; + ... + byte [] digest = new DigestUtils(SHA_224).digest(dataToDigest); + String hdigest = new DigestUtils(SHA_224).digestAsHex(new File("pom.xml")); +
MessageDigestAlgorithms
Constructor and Description | +
---|
DigestUtils()
+Deprecated.
+
+since 1.11; only useful to preserve binary compatibility
+ |
+
DigestUtils(MessageDigest digest)
+Creates an instance using the provided
+MessageDigest parameter. |
+
DigestUtils(String name)
+Creates an instance using the provided
+MessageDigest parameter. |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+digest(byte[] data)
+Reads through a byte array and returns the digest for the data.
+ |
+
byte[] |
+digest(ByteBuffer data)
+Reads through a ByteBuffer and returns the digest for the data
+ |
+
byte[] |
+digest(File data)
+Reads through a File and returns the digest for the data
+ |
+
byte[] |
+digest(InputStream data)
+Reads through an InputStream and returns the digest for the data
+ |
+
static byte[] |
+digest(MessageDigest messageDigest,
+ byte[] data)
+Reads through a byte array and returns the digest for the data.
+ |
+
static byte[] |
+digest(MessageDigest messageDigest,
+ ByteBuffer data)
+Reads through a ByteBuffer and returns the digest for the data
+ |
+
static byte[] |
+digest(MessageDigest messageDigest,
+ File data)
+Reads through a File and returns the digest for the data
+ |
+
static byte[] |
+digest(MessageDigest messageDigest,
+ InputStream data)
+Reads through an InputStream and returns the digest for the data
+ |
+
byte[] |
+digest(String data)
+Reads through a byte array and returns the digest for the data.
+ |
+
String |
+digestAsHex(byte[] data)
+Reads through a byte array and returns the digest for the data.
+ |
+
String |
+digestAsHex(ByteBuffer data)
+Reads through a ByteBuffer and returns the digest for the data
+ |
+
String |
+digestAsHex(File data)
+Reads through a File and returns the digest for the data
+ |
+
String |
+digestAsHex(InputStream data)
+Reads through an InputStream and returns the digest for the data
+ |
+
String |
+digestAsHex(String data)
+Reads through a byte array and returns the digest for the data.
+ |
+
static MessageDigest |
+getDigest(String algorithm)
+Returns a
+MessageDigest for the given algorithm . |
+
static MessageDigest |
+getDigest(String algorithm,
+ MessageDigest defaultMessageDigest)
+Returns a
+MessageDigest for the given algorithm or a default if there is a problem
+ getting the algorithm. |
+
static MessageDigest |
+getMd2Digest()
+Returns an MD2 MessageDigest.
+ |
+
static MessageDigest |
+getMd5Digest()
+Returns an MD5 MessageDigest.
+ |
+
MessageDigest |
+getMessageDigest()
+Returns the message digest instance.
+ |
+
static MessageDigest |
+getSha1Digest()
+Returns an SHA-1 digest.
+ |
+
static MessageDigest |
+getSha256Digest()
+Returns an SHA-256 digest.
+ |
+
static MessageDigest |
+getSha3_224Digest()
+Returns an SHA3-224 digest.
+ |
+
static MessageDigest |
+getSha3_256Digest()
+Returns an SHA3-256 digest.
+ |
+
static MessageDigest |
+getSha3_384Digest()
+Returns an SHA3-384 digest.
+ |
+
static MessageDigest |
+getSha3_512Digest()
+Returns an SHA3-512 digest.
+ |
+
static MessageDigest |
+getSha384Digest()
+Returns an SHA-384 digest.
+ |
+
static MessageDigest |
+getSha512Digest()
+Returns an SHA-512 digest.
+ |
+
static MessageDigest |
+getShaDigest()
+Deprecated.
+
+(1.11) Use
+getSha1Digest() |
+
static boolean |
+isAvailable(String messageDigestAlgorithm)
+Test whether the algorithm is supported.
+ |
+
static byte[] |
+md2(byte[] data)
+Calculates the MD2 digest and returns the value as a 16 element
+byte[] . |
+
static byte[] |
+md2(InputStream data)
+Calculates the MD2 digest and returns the value as a 16 element
+byte[] . |
+
static byte[] |
+md2(String data)
+Calculates the MD2 digest and returns the value as a 16 element
+byte[] . |
+
static String |
+md2Hex(byte[] data)
+Calculates the MD2 digest and returns the value as a 32 character hex string.
+ |
+
static String |
+md2Hex(InputStream data)
+Calculates the MD2 digest and returns the value as a 32 character hex string.
+ |
+
static String |
+md2Hex(String data)
+Calculates the MD2 digest and returns the value as a 32 character hex string.
+ |
+
static byte[] |
+md5(byte[] data)
+Calculates the MD5 digest and returns the value as a 16 element
+byte[] . |
+
static byte[] |
+md5(InputStream data)
+Calculates the MD5 digest and returns the value as a 16 element
+byte[] . |
+
static byte[] |
+md5(String data)
+Calculates the MD5 digest and returns the value as a 16 element
+byte[] . |
+
static String |
+md5Hex(byte[] data)
+Calculates the MD5 digest and returns the value as a 32 character hex string.
+ |
+
static String |
+md5Hex(InputStream data)
+Calculates the MD5 digest and returns the value as a 32 character hex string.
+ |
+
static String |
+md5Hex(String data)
+Calculates the MD5 digest and returns the value as a 32 character hex string.
+ |
+
static byte[] |
+sha(byte[] data)
+Deprecated.
+
+(1.11) Use
+sha1(byte[]) |
+
static byte[] |
+sha(InputStream data)
+Deprecated.
+
+(1.11) Use
+sha1(InputStream) |
+
static byte[] |
+sha(String data)
+Deprecated.
+
+(1.11) Use
+sha1(String) |
+
static byte[] |
+sha1(byte[] data)
+Calculates the SHA-1 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha1(InputStream data)
+Calculates the SHA-1 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha1(String data)
+Calculates the SHA-1 digest and returns the value as a
+byte[] . |
+
static String |
+sha1Hex(byte[] data)
+Calculates the SHA-1 digest and returns the value as a hex string.
+ |
+
static String |
+sha1Hex(InputStream data)
+Calculates the SHA-1 digest and returns the value as a hex string.
+ |
+
static String |
+sha1Hex(String data)
+Calculates the SHA-1 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha256(byte[] data)
+Calculates the SHA-256 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha256(InputStream data)
+Calculates the SHA-256 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha256(String data)
+Calculates the SHA-256 digest and returns the value as a
+byte[] . |
+
static String |
+sha256Hex(byte[] data)
+Calculates the SHA-256 digest and returns the value as a hex string.
+ |
+
static String |
+sha256Hex(InputStream data)
+Calculates the SHA-256 digest and returns the value as a hex string.
+ |
+
static String |
+sha256Hex(String data)
+Calculates the SHA-256 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha3_224(byte[] data)
+Calculates the SHA3-224 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_224(InputStream data)
+Calculates the SHA3-224 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_224(String data)
+Calculates the SHA3-224 digest and returns the value as a
+byte[] . |
+
static String |
+sha3_224Hex(byte[] data)
+Calculates the SHA3-224 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_224Hex(InputStream data)
+Calculates the SHA3-224 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_224Hex(String data)
+Calculates the SHA3-224 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha3_256(byte[] data)
+Calculates the SHA3-256 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_256(InputStream data)
+Calculates the SHA3-256 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_256(String data)
+Calculates the SHA3-256 digest and returns the value as a
+byte[] . |
+
static String |
+sha3_256Hex(byte[] data)
+Calculates the SHA3-256 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_256Hex(InputStream data)
+Calculates the SHA3-256 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_256Hex(String data)
+Calculates the SHA3-256 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha3_384(byte[] data)
+Calculates the SHA3-384 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_384(InputStream data)
+Calculates the SHA3-384 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_384(String data)
+Calculates the SHA3-384 digest and returns the value as a
+byte[] . |
+
static String |
+sha3_384Hex(byte[] data)
+Calculates the SHA3-384 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_384Hex(InputStream data)
+Calculates the SHA3-384 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_384Hex(String data)
+Calculates the SHA3-384 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha3_512(byte[] data)
+Calculates the SHA3-512 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_512(InputStream data)
+Calculates the SHA3-512 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha3_512(String data)
+Calculates the SHA3-512 digest and returns the value as a
+byte[] . |
+
static String |
+sha3_512Hex(byte[] data)
+Calculates the SHA3-512 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_512Hex(InputStream data)
+Calculates the SHA3-512 digest and returns the value as a hex string.
+ |
+
static String |
+sha3_512Hex(String data)
+Calculates the SHA3-512 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha384(byte[] data)
+Calculates the SHA-384 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha384(InputStream data)
+Calculates the SHA-384 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha384(String data)
+Calculates the SHA-384 digest and returns the value as a
+byte[] . |
+
static String |
+sha384Hex(byte[] data)
+Calculates the SHA-384 digest and returns the value as a hex string.
+ |
+
static String |
+sha384Hex(InputStream data)
+Calculates the SHA-384 digest and returns the value as a hex string.
+ |
+
static String |
+sha384Hex(String data)
+Calculates the SHA-384 digest and returns the value as a hex string.
+ |
+
static byte[] |
+sha512(byte[] data)
+Calculates the SHA-512 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha512(InputStream data)
+Calculates the SHA-512 digest and returns the value as a
+byte[] . |
+
static byte[] |
+sha512(String data)
+Calculates the SHA-512 digest and returns the value as a
+byte[] . |
+
static String |
+sha512Hex(byte[] data)
+Calculates the SHA-512 digest and returns the value as a hex string.
+ |
+
static String |
+sha512Hex(InputStream data)
+Calculates the SHA-512 digest and returns the value as a hex string.
+ |
+
static String |
+sha512Hex(String data)
+Calculates the SHA-512 digest and returns the value as a hex string.
+ |
+
static String |
+shaHex(byte[] data)
+Deprecated.
+
+(1.11) Use
+sha1Hex(byte[]) |
+
static String |
+shaHex(InputStream data)
+Deprecated.
+
+(1.11) Use
+sha1Hex(InputStream) |
+
static String |
+shaHex(String data)
+Deprecated.
+
+(1.11) Use
+sha1Hex(String) |
+
static MessageDigest |
+updateDigest(MessageDigest messageDigest,
+ byte[] valueToDigest)
+Updates the given
+MessageDigest . |
+
static MessageDigest |
+updateDigest(MessageDigest messageDigest,
+ ByteBuffer valueToDigest)
+Updates the given
+MessageDigest . |
+
static MessageDigest |
+updateDigest(MessageDigest digest,
+ File data)
+Reads through a File and updates the digest for the data
+ |
+
static MessageDigest |
+updateDigest(MessageDigest digest,
+ InputStream data)
+Reads through an InputStream and updates the digest for the data
+ |
+
static MessageDigest |
+updateDigest(MessageDigest messageDigest,
+ String valueToDigest)
+Updates the given
+MessageDigest from a String (converted to bytes using UTF-8). |
+
@Deprecated +public DigestUtils()+
public DigestUtils(MessageDigest digest)+
MessageDigest
parameter.
+
+ This can then be used to create digests using methods such as
+ digest(byte[])
and digestAsHex(File)
.digest
- the MessageDigest
to usepublic DigestUtils(String name)+
MessageDigest
parameter.
+
+ This can then be used to create digests using methods such as
+ digest(byte[])
and digestAsHex(File)
.name
- the name of the MessageDigest
to useIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.getDigest(String)
public static byte[] digest(MessageDigest messageDigest, + byte[] data)+
messageDigest
- The MessageDigest to use (e.g. MD5)data
- Data to digestpublic static byte[] digest(MessageDigest messageDigest, + ByteBuffer data)+
messageDigest
- The MessageDigest to use (e.g. MD5)data
- Data to digestpublic static byte[] digest(MessageDigest messageDigest, + File data) + throws IOException+
messageDigest
- The MessageDigest to use (e.g. MD5)data
- Data to digestIOException
- On error reading from the streampublic static byte[] digest(MessageDigest messageDigest, + InputStream data) + throws IOException+
messageDigest
- The MessageDigest to use (e.g. MD5)data
- Data to digestIOException
- On error reading from the streampublic static MessageDigest getDigest(String algorithm)+
MessageDigest
for the given algorithm
.algorithm
- the name of the algorithm requested. See Appendix A in the Java Cryptography Architecture Reference Guide for information about standard
+ algorithm names.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught.MessageDigest.getInstance(String)
public static MessageDigest getDigest(String algorithm, + MessageDigest defaultMessageDigest)+
MessageDigest
for the given algorithm
or a default if there is a problem
+ getting the algorithm.algorithm
- the name of the algorithm requested. See
+
+ Appendix A in the Java Cryptography Architecture Reference Guide for information about standard
+ algorithm names.defaultMessageDigest
- The default MessageDigest.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught.MessageDigest.getInstance(String)
public static MessageDigest getMd2Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because MD2 is a
+ built-in algorithmMessageDigestAlgorithms.MD2
public static MessageDigest getMd5Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because MD5 is a
+ built-in algorithmMessageDigestAlgorithms.MD5
public static MessageDigest getSha1Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because SHA-1 is a
+ built-in algorithmMessageDigestAlgorithms.SHA_1
public static MessageDigest getSha256Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because SHA-256 is a
+ built-in algorithmMessageDigestAlgorithms.SHA_256
public static MessageDigest getSha3_224Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should not happen on Oracle Java 9 and greater.MessageDigestAlgorithms.SHA3_224
public static MessageDigest getSha3_256Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should not happen on Oracle Java 9 and greater.MessageDigestAlgorithms.SHA3_256
public static MessageDigest getSha3_384Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should not happen on Oracle Java 9 and greater.MessageDigestAlgorithms.SHA3_384
public static MessageDigest getSha3_512Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should not happen on Oracle Java 9 and greater.MessageDigestAlgorithms.SHA3_512
public static MessageDigest getSha384Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because SHA-384 is a
+ built-in algorithmMessageDigestAlgorithms.SHA_384
public static MessageDigest getSha512Digest()+
IllegalArgumentException
- when a NoSuchAlgorithmException
is caught, which should never happen because SHA-512 is a
+ built-in algorithmMessageDigestAlgorithms.SHA_512
@Deprecated +public static MessageDigest getShaDigest()+
getSha1Digest()
IllegalArgumentException
- when a NoSuchAlgorithmException
is caughtpublic static boolean isAvailable(String messageDigestAlgorithm)+
messageDigestAlgorithm
- the algorithm nametrue
if the algorithm can be foundpublic static byte[] md2(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] md2(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] md2(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String md2Hex(byte[] data)+
data
- Data to digestpublic static String md2Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String md2Hex(String data)+
data
- Data to digestpublic static byte[] md5(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] md5(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] md5(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String md5Hex(byte[] data)+
data
- Data to digestpublic static String md5Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String md5Hex(String data)+
data
- Data to digest@Deprecated +public static byte[] sha(byte[] data)+
sha1(byte[])
byte[]
.data
- Data to digest@Deprecated +public static byte[] sha(InputStream data) + throws IOException+
sha1(InputStream)
byte[]
.data
- Data to digestIOException
- On error reading from the stream@Deprecated +public static byte[] sha(String data)+
sha1(String)
byte[]
.data
- Data to digestpublic static byte[] sha1(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha1(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha1(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha1Hex(byte[] data)+
data
- Data to digestpublic static String sha1Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha1Hex(String data)+
data
- Data to digestpublic static byte[] sha256(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha256(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha256(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha256Hex(byte[] data)+
data
- Data to digestpublic static String sha256Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha256Hex(String data)+
data
- Data to digestpublic static byte[] sha3_224(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha3_224(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha3_224(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha3_224Hex(String data)+
data
- Data to digestpublic static byte[] sha3_256(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha3_256(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha3_256(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha3_256Hex(String data)+
data
- Data to digestpublic static byte[] sha3_384(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha3_384(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha3_384(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha3_384Hex(String data)+
data
- Data to digestpublic static byte[] sha3_512(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha3_512(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha3_512(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha3_512Hex(String data)+
data
- Data to digestpublic static byte[] sha384(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha384(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha384(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha384Hex(byte[] data)+
data
- Data to digestpublic static String sha384Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha384Hex(String data)+
data
- Data to digestpublic static byte[] sha512(byte[] data)+
byte[]
.data
- Data to digestpublic static byte[] sha512(InputStream data) + throws IOException+
byte[]
.data
- Data to digestIOException
- On error reading from the streampublic static byte[] sha512(String data)+
byte[]
.data
- Data to digest; converted to bytes using StringUtils.getBytesUtf8(String)
public static String sha512Hex(byte[] data)+
data
- Data to digestpublic static String sha3_224Hex(byte[] data)+
data
- Data to digestpublic static String sha3_256Hex(byte[] data)+
data
- Data to digestpublic static String sha3_384Hex(byte[] data)+
data
- Data to digestpublic static String sha3_512Hex(byte[] data)+
data
- Data to digestpublic static String sha512Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha3_224Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha3_256Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha3_384Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha3_512Hex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic static String sha512Hex(String data)+
data
- Data to digest@Deprecated +public static String shaHex(byte[] data)+
sha1Hex(byte[])
data
- Data to digest@Deprecated +public static String shaHex(InputStream data) + throws IOException+
sha1Hex(InputStream)
data
- Data to digestIOException
- On error reading from the stream@Deprecated +public static String shaHex(String data)+
sha1Hex(String)
data
- Data to digestpublic static MessageDigest updateDigest(MessageDigest messageDigest, + byte[] valueToDigest)+
MessageDigest
.messageDigest
- the MessageDigest
to updatevalueToDigest
- the value to update the MessageDigest
withMessageDigest
public static MessageDigest updateDigest(MessageDigest messageDigest, + ByteBuffer valueToDigest)+
MessageDigest
.messageDigest
- the MessageDigest
to updatevalueToDigest
- the value to update the MessageDigest
withMessageDigest
public static MessageDigest updateDigest(MessageDigest digest, + File data) + throws IOException+
digest
- The MessageDigest to use (e.g. MD5)data
- Data to digestIOException
- On error reading from the streampublic static MessageDigest updateDigest(MessageDigest digest, + InputStream data) + throws IOException+
digest
- The MessageDigest to use (e.g. MD5)data
- Data to digestIOException
- On error reading from the streampublic static MessageDigest updateDigest(MessageDigest messageDigest, + String valueToDigest)+
MessageDigest
from a String (converted to bytes using UTF-8).
+
+ To update the digest using a different charset for the conversion,
+ convert the String to a byte array using
+ String.getBytes(java.nio.charset.Charset)
and pass that
+ to the updateDigest(MessageDigest, byte[])
method
messageDigest
- the MessageDigest
to updatevalueToDigest
- the value to update the MessageDigest
with;
+ converted to bytes using StringUtils.getBytesUtf8(String)
MessageDigest
public byte[] digest(byte[] data)+
data
- Data to digestpublic byte[] digest(ByteBuffer data)+
data
- Data to digestpublic byte[] digest(File data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic byte[] digest(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic byte[] digest(String data)+
data
- Data to digest treated as UTF-8 stringpublic String digestAsHex(byte[] data)+
data
- Data to digestpublic String digestAsHex(ByteBuffer data)+
data
- Data to digestpublic String digestAsHex(File data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic String digestAsHex(InputStream data) + throws IOException+
data
- Data to digestIOException
- On error reading from the streampublic String digestAsHex(String data)+
data
- Data to digest treated as UTF-8 stringpublic MessageDigest getMessageDigest()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacAlgorithms.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacAlgorithms.html new file mode 100644 index 0000000..73eb680 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacAlgorithms.html @@ -0,0 +1,506 @@ + + + + + + +public enum HmacAlgorithms +extends Enum<HmacAlgorithms>+
HmacUtils
algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation.
+
+ + Note: Not all JCE implementations support all the algorithms in this enum. +
Enum Constant and Description | +
---|
HMAC_MD5
+The HmacMD5 Message Authentication Code (MAC) algorithm specified in RFC 2104 and RFC 1321.
+ |
+
HMAC_SHA_1
+The HmacSHA1 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
+ |
+
HMAC_SHA_224
+The HmacSHA224 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
+ |
+
HMAC_SHA_256
+The HmacSHA256 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
+ |
+
HMAC_SHA_384
+The HmacSHA384 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
+ |
+
HMAC_SHA_512
+The HmacSHA512 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+getName()
+Gets the algorithm name.
+ |
+
String |
+toString()
+The algorithm name
+ |
+
static HmacAlgorithms |
+valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static HmacAlgorithms[] |
+values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
public static final HmacAlgorithms HMAC_MD5+
+ Every implementation of the Java platform is required to support this standard MAC algorithm. +
public static final HmacAlgorithms HMAC_SHA_1+
+ Every implementation of the Java platform is required to support this standard MAC algorithm. +
public static final HmacAlgorithms HMAC_SHA_224+
+ Every implementation of the Java 8+ platform is required to support this standard MAC algorithm. +
public static final HmacAlgorithms HMAC_SHA_256+
+ Every implementation of the Java platform is required to support this standard MAC algorithm. +
public static final HmacAlgorithms HMAC_SHA_384+
+ This MAC algorithm is optional; not all implementations support it. +
public static final HmacAlgorithms HMAC_SHA_512+
+ This MAC algorithm is optional; not all implementations support it. +
public static HmacAlgorithms[] values()+
+for (HmacAlgorithms c : HmacAlgorithms.values()) + System.out.println(c); +
public static HmacAlgorithms valueOf(String name)+
name
- the name of the enum constant to be returned.IllegalArgumentException
- if this enum type has no constant with the specified nameNullPointerException
- if the argument is nullpublic String getName()+
public String toString()+
toString
in class Enum<HmacAlgorithms>
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacUtils.html new file mode 100644 index 0000000..6f5317d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/HmacUtils.html @@ -0,0 +1,2002 @@ + + + + + + +public final class HmacUtils +extends Object+
Mac
tasks. This class is immutable and thread-safe.
+ However the Mac may not be.
+ + Note: Not all JCE implementations support all algorithms. If not supported, an IllegalArgumentException is + thrown. +
+ Sample usage: +
+ import static HmacAlgorithms.*; + byte[] key = {1,2,3,4}; // don't use this actual key! + String valueToDigest = "The quick brown fox jumps over the lazy dog"; + byte[] hmac = new HmacUtils(HMAC_SHA_224, key).hmac(valueToDigest); + // Mac re-use + HmacUtils hm1 = new HmacUtils("HmacAlgoName", key); // use a valid name here! + String hexPom = hm1.hmacHex(new File("pom.xml")); + String hexNot = hm1.hmacHex(new File("NOTICE.txt")); +
Constructor and Description | +
---|
HmacUtils()
+Deprecated.
+
+since 1.11; only useful to preserve binary compatibility
+ |
+
HmacUtils(HmacAlgorithms algorithm,
+ byte[] key)
+Creates an instance using the provided algorithm type.
+ |
+
HmacUtils(HmacAlgorithms algorithm,
+ String key)
+Creates an instance using the provided algorithm type.
+ |
+
HmacUtils(String algorithm,
+ byte[] key)
+Creates an instance using the provided algorithm type.
+ |
+
HmacUtils(String algorithm,
+ String key)
+Creates an instance using the provided algorithm type.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Mac |
+getHmacMd5(byte[] key)
+Deprecated.
+
+(1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_MD5, byte[]) |
+
static Mac |
+getHmacSha1(byte[] key)
+Deprecated.
+
+(1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_1, byte[]) |
+
static Mac |
+getHmacSha256(byte[] key)
+Deprecated.
+
+(1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_256, byte[]) |
+
static Mac |
+getHmacSha384(byte[] key)
+Deprecated.
+
+(1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_384, byte[]) |
+
static Mac |
+getHmacSha512(byte[] key)
+Deprecated.
+
+(1.11) Use
+getInitializedMac(HmacAlgorithms.HMAC_SHA_512, byte[]) |
+
static Mac |
+getInitializedMac(HmacAlgorithms algorithm,
+ byte[] key)
+Returns an initialized
+Mac for the given algorithm . |
+
static Mac |
+getInitializedMac(String algorithm,
+ byte[] key)
+Returns an initialized
+Mac for the given algorithm . |
+
byte[] |
+hmac(byte[] valueToDigest)
+Returns the digest for the input data.
+ |
+
byte[] |
+hmac(ByteBuffer valueToDigest)
+Returns the digest for the input data.
+ |
+
byte[] |
+hmac(File valueToDigest)
+Returns the digest for the file.
+ |
+
byte[] |
+hmac(InputStream valueToDigest)
+Returns the digest for the stream.
+ |
+
byte[] |
+hmac(String valueToDigest)
+Returns the digest for the input data.
+ |
+
String |
+hmacHex(byte[] valueToDigest)
+Returns the digest for the input data.
+ |
+
String |
+hmacHex(ByteBuffer valueToDigest)
+Returns the digest for the input data.
+ |
+
String |
+hmacHex(File valueToDigest)
+Returns the digest for the file.
+ |
+
String |
+hmacHex(InputStream valueToDigest)
+Returns the digest for the stream.
+ |
+
String |
+hmacHex(String valueToDigest)
+Returns the digest for the input data.
+ |
+
static byte[] |
+hmacMd5(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(byte[]) |
+
static byte[] |
+hmacMd5(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(InputStream) |
+
static byte[] |
+hmacMd5(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmac(String) |
+
static String |
+hmacMd5Hex(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(byte[]) |
+
static String |
+hmacMd5Hex(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(InputStream) |
+
static String |
+hmacMd5Hex(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmacHex(String) |
+
static byte[] |
+hmacSha1(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(byte[]) |
+
static byte[] |
+hmacSha1(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(InputStream) |
+
static byte[] |
+hmacSha1(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmac(String) |
+
static String |
+hmacSha1Hex(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(byte[]) |
+
static String |
+hmacSha1Hex(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(InputStream) |
+
static String |
+hmacSha1Hex(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmacHex(String) |
+
static byte[] |
+hmacSha256(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(byte[]) |
+
static byte[] |
+hmacSha256(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(InputStream) |
+
static byte[] |
+hmacSha256(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmac(String) |
+
static String |
+hmacSha256Hex(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(byte[]) |
+
static String |
+hmacSha256Hex(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(InputStream) |
+
static String |
+hmacSha256Hex(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmacHex(String) |
+
static byte[] |
+hmacSha384(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(byte[]) |
+
static byte[] |
+hmacSha384(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(InputStream) |
+
static byte[] |
+hmacSha384(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmac(String) |
+
static String |
+hmacSha384Hex(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(byte[]) |
+
static String |
+hmacSha384Hex(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(InputStream) |
+
static String |
+hmacSha384Hex(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmacHex(String) |
+
static byte[] |
+hmacSha512(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(byte[]) |
+
static byte[] |
+hmacSha512(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(InputStream) |
+
static byte[] |
+hmacSha512(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmac(String) |
+
static String |
+hmacSha512Hex(byte[] key,
+ byte[] valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(byte[]) |
+
static String |
+hmacSha512Hex(byte[] key,
+ InputStream valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(InputStream) |
+
static String |
+hmacSha512Hex(String key,
+ String valueToDigest)
+Deprecated.
+
+(1.11) Use
+new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmacHex(String) |
+
static boolean |
+isAvailable(HmacAlgorithms name)
+Returns whether this algorithm is available
+ |
+
static boolean |
+isAvailable(String name)
+Returns whether this algorithm is available
+ |
+
static Mac |
+updateHmac(Mac mac,
+ byte[] valueToDigest)
+Resets and then updates the given
+Mac with the value. |
+
static Mac |
+updateHmac(Mac mac,
+ InputStream valueToDigest)
+Resets and then updates the given
+Mac with the value. |
+
static Mac |
+updateHmac(Mac mac,
+ String valueToDigest)
+Resets and then updates the given
+Mac with the value. |
+
@Deprecated +public HmacUtils()+
public HmacUtils(String algorithm, + byte[] key)+
algorithm
- to usekey
- the key to useIllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.public HmacUtils(String algorithm, + String key)+
algorithm
- to usekey
- the key to useIllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.public HmacUtils(HmacAlgorithms algorithm, + String key)+
algorithm
- to usekey
- the key to useIllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.public HmacUtils(HmacAlgorithms algorithm, + byte[] key)+
algorithm
- to use.key
- the key to useIllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.public static boolean isAvailable(String name)+
name
- the name to checkpublic static boolean isAvailable(HmacAlgorithms name)+
name
- the name to check@Deprecated +public static Mac getHmacMd5(byte[] key)+
getInitializedMac(HmacAlgorithms.HMAC_MD5, byte[])
Mac
for the HmacMD5 algorithm.
+ + Every implementation of the Java platform is required to support this standard Mac algorithm. +
key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
@Deprecated +public static Mac getHmacSha1(byte[] key)+
getInitializedMac(HmacAlgorithms.HMAC_SHA_1, byte[])
Mac
for the HmacSHA1 algorithm.
+ + Every implementation of the Java platform is required to support this standard Mac algorithm. +
key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
@Deprecated +public static Mac getHmacSha256(byte[] key)+
getInitializedMac(HmacAlgorithms.HMAC_SHA_256, byte[])
Mac
for the HmacSHA256 algorithm.
+ + Every implementation of the Java platform is required to support this standard Mac algorithm. +
key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
@Deprecated +public static Mac getHmacSha384(byte[] key)+
getInitializedMac(HmacAlgorithms.HMAC_SHA_384, byte[])
Mac
for the HmacSHA384 algorithm.
+ + Every implementation of the Java platform is not required to support this Mac algorithm. +
key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
@Deprecated +public static Mac getHmacSha512(byte[] key)+
getInitializedMac(HmacAlgorithms.HMAC_SHA_512, byte[])
Mac
for the HmacSHA512 algorithm.
+ + Every implementation of the Java platform is not required to support this Mac algorithm. +
key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
public static Mac getInitializedMac(HmacAlgorithms algorithm, + byte[] key)+
Mac
for the given algorithm
.algorithm
- the name of the algorithm requested. See
+ Appendix A in the Java Cryptography Architecture Reference Guide for information about standard
+ algorithm names.key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
public static Mac getInitializedMac(String algorithm, + byte[] key)+
Mac
for the given algorithm
.algorithm
- the name of the algorithm requested. See
+ Appendix A in the Java Cryptography Architecture Reference Guide for information about standard
+ algorithm names.key
- They key for the keyed digest (must not be null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.Mac.getInstance(String)
,
+Mac.init(Key)
@Deprecated +public static byte[] hmacMd5(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacMd5(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacMd5(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmac(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacMd5Hex(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacMd5Hex(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacMd5Hex(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmacHex(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha1(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha1(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha1(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmac(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha1Hex(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha1Hex(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha1Hex(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmacHex(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha256(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha256(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha256(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmac(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha256Hex(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha256Hex(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha256Hex(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmacHex(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha384(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha384(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha384(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmac(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha384Hex(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha384Hex(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha384Hex(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmacHex(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha512(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha512(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static byte[] hmacSha512(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmac(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha512Hex(byte[] key, + byte[] valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(byte[])
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha512Hex(byte[] key, + InputStream valueToDigest) + throws IOException+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(InputStream)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.@Deprecated +public static String hmacSha512Hex(String key, + String valueToDigest)+
new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmacHex(String)
key
- They key for the keyed digest (must not be null)valueToDigest
- The value (data) which should to digest (maybe empty or null)IllegalArgumentException
- when a NoSuchAlgorithmException
is caught or key is null or key is invalid.public static Mac updateHmac(Mac mac, + byte[] valueToDigest)+
Mac
with the value.mac
- the initialized Mac
to updatevalueToDigest
- the value to update the Mac
with (maybe null or empty)Mac
IllegalStateException
- if the Mac was not initializedpublic static Mac updateHmac(Mac mac, + InputStream valueToDigest) + throws IOException+
Mac
with the value.mac
- the initialized Mac
to updatevalueToDigest
- the value to update the Mac
with
+ + The InputStream must not be null and will not be closed +
Mac
IOException
- If an I/O error occurs.IllegalStateException
- If the Mac was not initializedpublic static Mac updateHmac(Mac mac, + String valueToDigest)+
Mac
with the value.mac
- the initialized Mac
to updatevalueToDigest
- the value to update the Mac
with (maybe null or empty)Mac
IllegalStateException
- if the Mac was not initializedpublic byte[] hmac(byte[] valueToDigest)+
valueToDigest
- the input to usepublic String hmacHex(byte[] valueToDigest)+
valueToDigest
- the input to usepublic byte[] hmac(String valueToDigest)+
valueToDigest
- the input to use, treated as UTF-8public String hmacHex(String valueToDigest)+
valueToDigest
- the input to use, treated as UTF-8public byte[] hmac(ByteBuffer valueToDigest)+
valueToDigest
- the input to usepublic String hmacHex(ByteBuffer valueToDigest)+
valueToDigest
- the input to usepublic byte[] hmac(InputStream valueToDigest) + throws IOException+
valueToDigest
- the data to use
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.public String hmacHex(InputStream valueToDigest) + throws IOException+
valueToDigest
- the data to use
+ + The InputStream must not be null and will not be closed +
IOException
- If an I/O error occurs.public byte[] hmac(File valueToDigest) + throws IOException+
valueToDigest
- the file to useIOException
- If an I/O error occurs.public String hmacHex(File valueToDigest) + throws IOException+
valueToDigest
- the file to useIOException
- If an I/O error occurs.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Md5Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Md5Crypt.html new file mode 100644 index 0000000..b6fc536 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Md5Crypt.html @@ -0,0 +1,620 @@ + + + + + + +public class Md5Crypt +extends Object+
+ Based on the public domain ("beer-ware") C implementation from Poul-Henning Kamp which was found at:
+ crypt-md5.c @ freebsd.org
+
+ Source: + +
+ $FreeBSD: src/lib/libcrypt/crypt-md5.c,v 1.1 1999/01/21 13:50:09 brandon Exp $ ++
+ Conversion to Kotlin and from there to Java in 2012. +
+ The C style comments are from the original C code, the ones with "//" from the port. +
+ This class is immutable and thread-safe.
Modifier and Type | +Method and Description | +
---|---|
static String |
+apr1Crypt(byte[] keyBytes)
+See
+apr1Crypt(byte[], String) for details. |
+
static String |
+apr1Crypt(byte[] keyBytes,
+ Random random)
+See
+apr1Crypt(byte[], String) for details. |
+
static String |
+apr1Crypt(byte[] keyBytes,
+ String salt)
+See
+apr1Crypt(String, String) for details. |
+
static String |
+apr1Crypt(String keyBytes)
+See
+apr1Crypt(String, String) for details. |
+
static String |
+apr1Crypt(String keyBytes,
+ String salt)
+Generates an Apache htpasswd compatible "$apr1$" MD5 based hash value.
+ |
+
static String |
+md5Crypt(byte[] keyBytes)
+Generates a libc6 crypt() compatible "$1$" hash value.
+ |
+
static String |
+md5Crypt(byte[] keyBytes,
+ Random random)
+Generates a libc6 crypt() compatible "$1$" hash value.
+ |
+
static String |
+md5Crypt(byte[] keyBytes,
+ String salt)
+Generates a libc crypt() compatible "$1$" MD5 based hash value.
+ |
+
static String |
+md5Crypt(byte[] keyBytes,
+ String salt,
+ String prefix)
+Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value.
+ |
+
static String |
+md5Crypt(byte[] keyBytes,
+ String salt,
+ String prefix,
+ Random random)
+Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value.
+ |
+
public static String apr1Crypt(byte[] keyBytes)+
apr1Crypt(byte[], String)
for details.
+
+ A salt is generated for you using SecureRandom
; your own Random
in
+ apr1Crypt(byte[], Random)
.
+
keyBytes
- plaintext string to hash.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught. *apr1Crypt(byte[], String)
public static String apr1Crypt(byte[] keyBytes, + Random random)+
apr1Crypt(byte[], String)
for details.
+
+ A salt is generated for you using the user provided Random
.
+
keyBytes
- plaintext string to hash.random
- an arbitrary Random
for the user's reason.random
- the instance of Random
to use for generating the salt. Consider using SecureRandom
+ or ThreadLocalRandom
.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught. *apr1Crypt(byte[], String)
public static String apr1Crypt(byte[] keyBytes, + String salt)+
apr1Crypt(String, String)
for details.
+
+ A salt is generated for you using SecureRandom
+
keyBytes
- plaintext string to hash.salt
- An APR1 salt. The salt may be null, in which case a salt is generated for you using
+ ThreadLocalRandom
; for more secure salts consider using SecureRandom
to generate your
+ own salts.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String apr1Crypt(String keyBytes)+
apr1Crypt(String, String)
for details.
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling apr1Crypt(byte[], String)
.
+
keyBytes
- plaintext string to hash.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught.apr1Crypt(byte[], String)
public static String apr1Crypt(String keyBytes, + String salt)+
+ The algorithm is identical to the crypt(3) "$1$" one but produces different outputs due to the different salt + prefix.
keyBytes
- plaintext string to hash.salt
- salt string including the prefix and optionally garbage at the end. The salt may be null, in which
+ case a salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String md5Crypt(byte[] keyBytes)+
+ See md5Crypt(byte[], String)
for details.
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling md5Crypt(byte[], String)
.
+
keyBytes
- plaintext string to hash.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught.md5Crypt(byte[], String)
public static String md5Crypt(byte[] keyBytes, + Random random)+
+ See md5Crypt(byte[], String)
for details.
+
+ A salt is generated for you using the instance of Random
you supply.
+
keyBytes
- plaintext string to hash.random
- the instance of Random
to use for generating the salt. Consider using SecureRandom
+ or ThreadLocalRandom
.IllegalArgumentException
- when a NoSuchAlgorithmException
is caught.md5Crypt(byte[], String)
public static String md5Crypt(byte[] keyBytes, + String salt)+
+ See Crypt.crypt(String, String)
for details. We use SecureRandom
for seed generation by
+ default.
+
keyBytes
- plaintext string to hash.salt
- salt string including the prefix and optionally garbage at the end. The salt may be null, in which
+ case a salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String md5Crypt(byte[] keyBytes, + String salt, + String prefix)+
+ See Crypt.crypt(String, String)
or apr1Crypt(String, String)
for details. We use
+ by default
.
+
keyBytes
- plaintext string to hash.salt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using ThreadLocalRandom
; for more secure salts consider using SecureRandom
to
+ generate your own salts.prefix
- salt prefixIllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String md5Crypt(byte[] keyBytes, + String salt, + String prefix, + Random random)+
+ See Crypt.crypt(String, String)
or apr1Crypt(String, String)
for details.
+
keyBytes
- plaintext string to hash.salt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using ThreadLocalRandom
; for more secure salts consider using SecureRandom
to
+ generate your own salts.prefix
- salt prefixrandom
- the instance of Random
to use for generating the salt. Consider using SecureRandom
+ or ThreadLocalRandom
.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/MessageDigestAlgorithms.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/MessageDigestAlgorithms.html new file mode 100644 index 0000000..6a06130 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/MessageDigestAlgorithms.html @@ -0,0 +1,543 @@ + + + + + + +public class MessageDigestAlgorithms +extends Object+
MessageDigest
algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation.
+ + This class is immutable and thread-safe. +
++ Java 8 and up: SHA-224. +
++ Java 9 and up: SHA3-224, SHA3-256, SHA3-384, SHA3-512. +
Modifier and Type | +Field and Description | +
---|---|
static String |
+MD2
+The MD2 message digest algorithm defined in RFC 1319.
+ |
+
static String |
+MD5
+The MD5 message digest algorithm defined in RFC 1321.
+ |
+
static String |
+SHA_1
+The SHA-1 hash algorithm defined in the FIPS PUB 180-2.
+ |
+
static String |
+SHA_224
+The SHA-224 hash algorithm defined in the FIPS PUB 180-3.
+ |
+
static String |
+SHA_256
+The SHA-256 hash algorithm defined in the FIPS PUB 180-2.
+ |
+
static String |
+SHA_384
+The SHA-384 hash algorithm defined in the FIPS PUB 180-2.
+ |
+
static String |
+SHA_512
+The SHA-512 hash algorithm defined in the FIPS PUB 180-2.
+ |
+
static String |
+SHA3_224
+The SHA3-224 hash algorithm defined in the FIPS PUB 202.
+ |
+
static String |
+SHA3_256
+The SHA3-256 hash algorithm defined in the FIPS PUB 202.
+ |
+
static String |
+SHA3_384
+The SHA3-384 hash algorithm defined in the FIPS PUB 202.
+ |
+
static String |
+SHA3_512
+The SHA3-512 hash algorithm defined in the FIPS PUB 202.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static String[] |
+values()
+Gets all constant values defined in this class.
+ |
+
public static final String MD2+
public static final String MD5+
public static final String SHA_1+
public static final String SHA_224+
+ Present in Oracle Java 8. +
public static final String SHA_256+
public static final String SHA_384+
public static final String SHA_512+
public static final String SHA3_224+
+ Included starting in Oracle Java 9 GA. +
public static final String SHA3_256+
+ Included starting in Oracle Java 9 GA. +
public static final String SHA3_384+
+ Included starting in Oracle Java 9 GA. +
public static final String SHA3_512+
+ Included starting in Oracle Java 9 GA. +
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32.html new file mode 100644 index 0000000..9dced8a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32.html @@ -0,0 +1,361 @@ + + + + + + +public class PureJavaCrc32 +extends Object +implements Checksum+
+ This class is Not ThreadSafe
CRC32
Constructor and Description | +
---|
PureJavaCrc32()
+Create a new PureJavaCrc32 object.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
long |
+getValue() |
+
void |
+reset() |
+
void |
+update(byte[] b,
+ int offset,
+ int len) |
+
void |
+update(int b) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32C.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32C.html new file mode 100644 index 0000000..d1e7f01 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/PureJavaCrc32C.html @@ -0,0 +1,353 @@ + + + + + + +public class PureJavaCrc32C +extends Object +implements Checksum+
+ This class is Not ThreadSafe
Constructor and Description | +
---|
PureJavaCrc32C()
+Create a new PureJavaCrc32 object.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
long |
+getValue() |
+
void |
+reset() |
+
void |
+update(byte[] b,
+ int off,
+ int len) |
+
void |
+update(int b) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Sha2Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Sha2Crypt.html new file mode 100644 index 0000000..61738df --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/Sha2Crypt.html @@ -0,0 +1,475 @@ + + + + + + +public class Sha2Crypt +extends Object+
+ Based on the C implementation released into the Public Domain by Ulrich Drepper <drepper@redhat.com> + http://www.akkadia.org/drepper/SHA-crypt.txt +
+ Conversion to Kotlin and from there to Java in 2012 by Christian Hammers <ch@lathspell.de> and likewise put + into the Public Domain. +
+ This class is immutable and thread-safe.
Modifier and Type | +Method and Description | +
---|---|
static String |
+sha256Crypt(byte[] keyBytes)
+Generates a libc crypt() compatible "$5$" hash value with random salt.
+ |
+
static String |
+sha256Crypt(byte[] keyBytes,
+ String salt)
+Generates a libc6 crypt() compatible "$5$" hash value.
+ |
+
static String |
+sha256Crypt(byte[] keyBytes,
+ String salt,
+ Random random)
+Generates a libc6 crypt() compatible "$5$" hash value.
+ |
+
static String |
+sha512Crypt(byte[] keyBytes)
+Generates a libc crypt() compatible "$6$" hash value with random salt.
+ |
+
static String |
+sha512Crypt(byte[] keyBytes,
+ String salt)
+Generates a libc6 crypt() compatible "$6$" hash value.
+ |
+
static String |
+sha512Crypt(byte[] keyBytes,
+ String salt,
+ Random random)
+Generates a libc6 crypt() compatible "$6$" hash value.
+ |
+
public static String sha256Crypt(byte[] keyBytes)+
+ See Crypt.crypt(String, String)
for details.
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling sha256Crypt(byte[], String)
.
+
keyBytes
- plaintext to hashIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String sha256Crypt(byte[] keyBytes, + String salt)+
+ See Crypt.crypt(String, String)
for details.
+
keyBytes
- plaintext to hashsalt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using SecureRandom
. If one does not want to use SecureRandom
, you can pass your
+ own Random
in sha256Crypt(byte[], String, Random)
.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String sha256Crypt(byte[] keyBytes, + String salt, + Random random)+
+ See Crypt.crypt(String, String)
for details.
+
keyBytes
- plaintext to hashsalt
- real salt value without prefix or "rounds=".random
- the instance of Random
to use for generating the salt. Consider using SecureRandom
+ or ThreadLocalRandom
.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String sha512Crypt(byte[] keyBytes)+
+ See Crypt.crypt(String, String)
for details.
+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling sha512Crypt(byte[], String)
.
+
keyBytes
- plaintext to hashIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String sha512Crypt(byte[] keyBytes, + String salt)+
+ See Crypt.crypt(String, String)
for details.
+
keyBytes
- plaintext to hashsalt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated
+ for you using SecureRandom
; if you want to use a Random
object other than
+ SecureRandom
then we suggest you provide it using
+ sha512Crypt(byte[], String, Random)
.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.public static String sha512Crypt(byte[] keyBytes, + String salt, + Random random)+
+ See Crypt.crypt(String, String)
for details.
+
keyBytes
- plaintext to hashsalt
- real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for
+ you using ThreadLocalRandom
; for more secure salts consider using SecureRandom
to
+ generate your own salts.random
- the instance of Random
to use for generating the salt. Consider using SecureRandom
+ or ThreadLocalRandom
.IllegalArgumentException
- if the salt does not match the allowed patternIllegalArgumentException
- when a NoSuchAlgorithmException
is caught.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/UnixCrypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/UnixCrypt.html new file mode 100644 index 0000000..09f1060 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/UnixCrypt.html @@ -0,0 +1,390 @@ + + + + + + +public class UnixCrypt +extends Object+
+ This class only implements the traditional 56 bit DES based algorithm. Please use DigestUtils.crypt() for a method + that distinguishes between all the algorithms supported in the current glibc's crypt(). +
+ The Java implementation was taken from the JetSpeed Portal project (see + org.apache.jetspeed.services.security.ldap.UnixCrypt). +
+ This class is slightly incompatible if the given salt contains characters that are not part of the allowed range + [a-zA-Z0-9./]. +
+ This class is immutable and thread-safe.
Modifier and Type | +Method and Description | +
---|---|
static String |
+crypt(byte[] original)
+Generates a crypt(3) compatible hash using the DES algorithm.
+ |
+
static String |
+crypt(byte[] original,
+ String salt)
+Generates a crypt(3) compatible hash using the DES algorithm.
+ |
+
static String |
+crypt(String original)
+Generates a crypt(3) compatible hash using the DES algorithm.
+ |
+
static String |
+crypt(String original,
+ String salt)
+Generates a crypt(3) compatible hash using the DES algorithm.
+ |
+
public static String crypt(byte[] original)+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling crypt(byte[], String)
.
+
original
- plaintext passwordpublic static String crypt(byte[] original, + String salt)+
+ Using unspecified characters as salt results incompatible hash values. +
original
- plaintext passwordsalt
- a two character string drawn from [a-zA-Z0-9./]. The salt may be null, in which case a salt is
+ generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternpublic static String crypt(String original)+
+ A salt is generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts and calling crypt(String, String)
.
+
original
- plaintext passwordpublic static String crypt(String original, + String salt)+
original
- plaintext passwordsalt
- a two character string drawn from [a-zA-Z0-9./]. The salt may be null, in which case a salt is
+ generated for you using ThreadLocalRandom
; for more secure salts consider using
+ SecureRandom
to generate your own salts.IllegalArgumentException
- if the salt does not match the allowed patternCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/XXHash32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/XXHash32.html new file mode 100644 index 0000000..042c600 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/XXHash32.html @@ -0,0 +1,369 @@ + + + + + + +public class XXHash32 +extends Object +implements Checksum+
Copied from Commons Compress 1.14 + https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD
+NotThreadSafe
Constructor and Description | +
---|
XXHash32()
+Creates an XXHash32 instance with a seed of 0.
+ |
+
XXHash32(int seed)
+Creates an XXHash32 instance.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
long |
+getValue() |
+
void |
+reset() |
+
void |
+update(byte[] b,
+ int off,
+ int len) |
+
void |
+update(int b) |
+
public XXHash32()+
public XXHash32(int seed)+
seed
- the seed to useCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Crypt.html new file mode 100644 index 0000000..24b8018 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Crypt.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/DigestUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/DigestUtils.html new file mode 100644 index 0000000..4c3083e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/DigestUtils.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacAlgorithms.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacAlgorithms.html new file mode 100644 index 0000000..59a18aa --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacAlgorithms.html @@ -0,0 +1,221 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.digest | +
+ Simplifies common
+MessageDigest tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant. |
+
Modifier and Type | +Method and Description | +
---|---|
static HmacAlgorithms |
+HmacAlgorithms.valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static HmacAlgorithms[] |
+HmacAlgorithms.values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Mac |
+HmacUtils.getInitializedMac(HmacAlgorithms algorithm,
+ byte[] key)
+Returns an initialized
+Mac for the given algorithm . |
+
static boolean |
+HmacUtils.isAvailable(HmacAlgorithms name)
+Returns whether this algorithm is available
+ |
+
Constructor and Description | +
---|
HmacUtils(HmacAlgorithms algorithm,
+ byte[] key)
+Creates an instance using the provided algorithm type.
+ |
+
HmacUtils(HmacAlgorithms algorithm,
+ String key)
+Creates an instance using the provided algorithm type.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacUtils.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacUtils.html new file mode 100644 index 0000000..d1290e3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/HmacUtils.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Md5Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Md5Crypt.html new file mode 100644 index 0000000..7e9edf0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Md5Crypt.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/MessageDigestAlgorithms.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/MessageDigestAlgorithms.html new file mode 100644 index 0000000..54810d7 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/MessageDigestAlgorithms.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32.html new file mode 100644 index 0000000..bf21d5e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32C.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32C.html new file mode 100644 index 0000000..5b677aa --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/PureJavaCrc32C.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Sha2Crypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Sha2Crypt.html new file mode 100644 index 0000000..4934ecf --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/Sha2Crypt.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/UnixCrypt.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/UnixCrypt.html new file mode 100644 index 0000000..bbd997f --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/UnixCrypt.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/XXHash32.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/XXHash32.html new file mode 100644 index 0000000..dd0cda6 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/class-use/XXHash32.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-frame.html new file mode 100644 index 0000000..8213c5c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-frame.html @@ -0,0 +1,33 @@ + + + + + + +MessageDigest
tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant.See: Description
+Class | +Description | +
---|---|
Crypt | +
+ GNU libc crypt(3) compatible hash method.
+ |
+
DigestUtils | +
+ Operations to simplify common
+MessageDigest tasks. |
+
HmacUtils | +
+ Simplifies common
+Mac tasks. |
+
Md5Crypt | +
+ The libc crypt() "$1$" and Apache "$apr1$" MD5-based hash algorithm.
+ |
+
MessageDigestAlgorithms | +
+ Standard
+MessageDigest algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation. |
+
PureJavaCrc32 | +
+ A pure-java implementation of the CRC32 checksum that uses
+ the same polynomial as the built-in native CRC32.
+ |
+
PureJavaCrc32C | +
+ A pure-java implementation of the CRC32 checksum that uses
+ the CRC32-C polynomial, the same polynomial used by iSCSI
+ and implemented on many Intel chipsets supporting SSE4.2.
+ |
+
Sha2Crypt | +
+ SHA2-based Unix crypt implementation.
+ |
+
UnixCrypt | +
+ Unix crypt(3) algorithm implementation.
+ |
+
XXHash32 | +
+ Implementation of the xxhash32 hash algorithm.
+ |
+
Enum | +Description | +
---|---|
HmacAlgorithms | +
+ Standard
+HmacUtils algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation. |
+
MessageDigest
tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-tree.html new file mode 100644 index 0000000..92f4628 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-tree.html @@ -0,0 +1,159 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-use.html new file mode 100644 index 0000000..0a95073 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/digest/package-use.html @@ -0,0 +1,166 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.digest | +
+ Simplifies common
+MessageDigest tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant. |
+
Class and Description | +
---|
HmacAlgorithms
+ Standard
+HmacUtils algorithm names from the Java Cryptography Architecture Standard Algorithm Name
+ Documentation. |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/AbstractCaverphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/AbstractCaverphone.html new file mode 100644 index 0000000..916cf5d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/AbstractCaverphone.html @@ -0,0 +1,351 @@ + + + + + + +public abstract class AbstractCaverphone +extends Object +implements StringEncoder+
This class is immutable and thread-safe.
Constructor and Description | +
---|
AbstractCaverphone()
+Creates an instance of the Caverphone encoder
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object source)
+Encodes an Object using the caverphone algorithm.
+ |
+
boolean |
+isEncodeEqual(String str1,
+ String str2)
+Tests if the encodings of two strings are equal.
+ |
+
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
encode
public AbstractCaverphone()+
public Object encode(Object source) + throws EncoderException+
encode
in interface Encoder
source
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.Stringpublic boolean isEncodeEqual(String str1, + String str2) + throws EncoderException+
str1
- First of two strings to comparestr2
- Second of two strings to comparetrue
if the encodings of these strings are identical, false
otherwise.EncoderException
- thrown if there is an error condition during the encoding process.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone.html new file mode 100644 index 0000000..1ed75c2 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone.html @@ -0,0 +1,394 @@ + + + + + + +Caverphone2
, will be removed in 2.0.@Deprecated +public class Caverphone +extends Object +implements StringEncoder+
Caverphone2
instance.
+
+ This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0
+ algorithm:Constructor and Description | +
---|
Caverphone()
+Deprecated.
+Creates an instance of the Caverphone encoder
+ |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+caverphone(String source)
+Deprecated.
+Encodes the given String into a Caverphone value.
+ |
+
Object |
+encode(Object obj)
+Deprecated.
+Encodes an Object using the caverphone algorithm.
+ |
+
String |
+encode(String str)
+Deprecated.
+Encodes a String using the Caverphone algorithm.
+ |
+
boolean |
+isCaverphoneEqual(String str1,
+ String str2)
+Deprecated.
+Tests if the caverphones of two strings are identical.
+ |
+
public Caverphone()+
public String caverphone(String source)+
source
- String the source stringpublic Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.Stringpublic String encode(String str)+
encode
in interface StringEncoder
str
- String object to encodepublic boolean isCaverphoneEqual(String str1, + String str2)+
str1
- First of two strings to comparestr2
- Second of two strings to comparetrue
if the caverphones of these strings are identical, false
otherwise.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone1.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone1.html new file mode 100644 index 0000000..196b1c9 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone1.html @@ -0,0 +1,312 @@ + + + + + + +public class Caverphone1 +extends AbstractCaverphone+
This class is immutable and thread-safe.
Constructor and Description | +
---|
Caverphone1() |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+encode(String source)
+Encodes the given String into a Caverphone value.
+ |
+
encode, isEncodeEqual
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone2.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone2.html new file mode 100644 index 0000000..99128b9 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Caverphone2.html @@ -0,0 +1,312 @@ + + + + + + +public class Caverphone2 +extends AbstractCaverphone+
This class is immutable and thread-safe.
Constructor and Description | +
---|
Caverphone2() |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+encode(String source)
+Encodes the given String into a Caverphone 2.0 value.
+ |
+
encode, isEncodeEqual
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/ColognePhonetic.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/ColognePhonetic.html new file mode 100644 index 0000000..49389e7 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/ColognePhonetic.html @@ -0,0 +1,520 @@ + + + + + + +public class ColognePhonetic +extends Object +implements StringEncoder+
+ Implements the Kölner Phonetik (Cologne + Phonetic) algorithm issued by Hans Joachim Postel in 1969. +
++ The Kölner Phonetik is a phonetic algorithm which is optimized for the German language. It is related to + the well-known soundex algorithm. +
+ +Letter | +Context | +Code | +
---|---|---|
A, E, I, J, O, U, Y | ++ | 0 | +
H | ++ | - | +
B | ++ | 1 | +
P | +not before H | + +|
D, T | +not before C, S, Z | +2 | +
F, V, W | ++ | 3 | +
P | +before H | +|
G, K, Q | ++ | 4 | +
C | +at onset before A, H, K, L, O, Q, R, U, X | + +|
before A, H, K, O, Q, U, X except after S, Z | +||
X | +not after C, K, Q | +48 | +
L | ++ + | 5 | +
M, N | ++ | 6 | +
R | ++ | 7 | +
S, Z | ++ | 8 | +
C | +after S, Z | +|
at onset except before A, H, K, L, O, Q, R, U, X | +||
not before A, H, K, O, Q, U, X | +||
D, T | +before C, S, Z | +|
X | +after C, K, Q | +
"M
üller-L
üdenscheidt"
+ => "MULLERLUDENSCHEIDT" => "6005507500206880022"
+
+ "6005507500206880022" => "6050750206802"
"6050750206802" => "65752682"
+ This class is thread-safe. +
Constructor and Description | +
---|
ColognePhonetic() |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+colognePhonetic(String text)
+
+ Implements the Kölner Phonetik algorithm.
+ |
+
Object |
+encode(Object object)
+Encodes an "Object" and returns the encoded content as an Object.
+ |
+
String |
+encode(String text)
+Encodes a String and returns a String.
+ |
+
boolean |
+isEncodeEqual(String text1,
+ String text2) |
+
public String colognePhonetic(String text)+
+ Implements the Kölner Phonetik algorithm. +
++ In contrast to the initial description of the algorithm, this implementation does the encoding in one pass. +
text
- The source text to encodepublic Object encode(Object object) + throws EncoderException+
Encoder
byte[]
or String
s depending on the implementation used.encode
in interface Encoder
object
- An object to encodeEncoderException
- An encoder exception is thrown if the encoder experiences a failure condition during the encoding
+ process.public String encode(String text)+
StringEncoder
encode
in interface StringEncoder
text
- the String to encodeCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DaitchMokotoffSoundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DaitchMokotoffSoundex.html new file mode 100644 index 0000000..106b318 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DaitchMokotoffSoundex.html @@ -0,0 +1,429 @@ + + + + + + +public class DaitchMokotoffSoundex +extends Object +implements StringEncoder+
+ The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater + accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling. +
++ The main differences compared to the other soundex variants are: +
++ This implementation supports branching, depending on the used method: +
encode(String)
- branching disabled, only the first code will be returned
+ soundex(String)
- branching enabled, all codes will be returned, separated by '|'
+
+ Note: this implementation has additional branching rules compared to the original description of the algorithm. The
+ rules can be customized by overriding the default rules contained in the resource file
+ org/apache/commons/codec/language/dmrules.txt
.
+
+ This class is thread-safe. +
Soundex
,
+ Wikipedia - Daitch-Mokotoff Soundex,
+Avotaynu - Soundexing and GenealogyConstructor and Description | +
---|
DaitchMokotoffSoundex()
+Creates a new instance with ASCII-folding enabled.
+ |
+
DaitchMokotoffSoundex(boolean folding)
+Creates a new instance.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object obj)
+Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching.
+ |
+
String |
+encode(String source)
+Encodes a String using the Daitch-Mokotoff soundex algorithm without branching.
+ |
+
String |
+soundex(String source)
+Encodes a String using the Daitch-Mokotoff soundex algorithm with branching.
+ |
+
public DaitchMokotoffSoundex()+
public DaitchMokotoffSoundex(boolean folding)+
+ With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g. + è -> e. +
folding
- if ASCII-folding shall be performed before encodingpublic Object encode(Object obj) + throws EncoderException+
+ This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an + EncoderException if the supplied object is not of type java.lang.String. +
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.StringIllegalArgumentException
- if a character is not mappedsoundex(String)
public String encode(String source)+
encode
in interface StringEncoder
source
- A String object to encodeIllegalArgumentException
- if a character is not mappedsoundex(String)
public String soundex(String source)+
+ In case a string is encoded into multiple codes (see branching rules), the result will contain all codes, + separated by '|'. +
++ Example: the name "AUERBACH" is encoded as both +
++ Thus the result will be "097400|097500". +
source
- A String object to encodeIllegalArgumentException
- if a character is not mappedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.DoubleMetaphoneResult.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.DoubleMetaphoneResult.html new file mode 100644 index 0000000..b823a1f --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.DoubleMetaphoneResult.html @@ -0,0 +1,411 @@ + + + + + + +public class DoubleMetaphone.DoubleMetaphoneResult +extends Object+
Constructor and Description | +
---|
DoubleMetaphoneResult(int maxLength) |
+
Modifier and Type | +Method and Description | +
---|---|
void |
+append(char value) |
+
void |
+append(char primary,
+ char alternate) |
+
void |
+append(String value) |
+
void |
+append(String primary,
+ String alternate) |
+
void |
+appendAlternate(char value) |
+
void |
+appendAlternate(String value) |
+
void |
+appendPrimary(char value) |
+
void |
+appendPrimary(String value) |
+
String |
+getAlternate() |
+
String |
+getPrimary() |
+
boolean |
+isComplete() |
+
public DoubleMetaphoneResult(int maxLength)+
public void append(char value)+
public void append(char primary, + char alternate)+
public void appendPrimary(char value)+
public void appendAlternate(char value)+
public void append(String value)+
public void appendPrimary(String value)+
public void appendAlternate(String value)+
public String getPrimary()+
public String getAlternate()+
public boolean isComplete()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.html new file mode 100644 index 0000000..be9f6dc --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/DoubleMetaphone.html @@ -0,0 +1,536 @@ + + + + + + +public class DoubleMetaphone +extends Object +implements StringEncoder+
+ This class is conditionally thread-safe. The instance field maxCodeLen
is mutable
+ setMaxCodeLen(int)
but is not volatile, and accesses are not synchronized. If an instance of the class is
+ shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
+ of the value between threads, and must not invoke setMaxCodeLen(int)
after initial setup.
Modifier and Type | +Class and Description | +
---|---|
class |
+DoubleMetaphone.DoubleMetaphoneResult
+Inner class for storing results, since there is the optional alternate encoding.
+ |
+
Constructor and Description | +
---|
DoubleMetaphone()
+Creates an instance of this DoubleMetaphone encoder
+ |
+
Modifier and Type | +Method and Description | +
---|---|
protected char |
+charAt(String value,
+ int index) |
+
protected static boolean |
+contains(String value,
+ int start,
+ int length,
+ String... criteria) |
+
String |
+doubleMetaphone(String value)
+Encode a value with Double Metaphone.
+ |
+
String |
+doubleMetaphone(String value,
+ boolean alternate)
+Encode a value with Double Metaphone, optionally using the alternate encoding.
+ |
+
Object |
+encode(Object obj)
+Encode the value using DoubleMetaphone.
+ |
+
String |
+encode(String value)
+Encode the value using DoubleMetaphone.
+ |
+
int |
+getMaxCodeLen()
+Returns the maxCodeLen.
+ |
+
boolean |
+isDoubleMetaphoneEqual(String value1,
+ String value2)
+Check if the Double Metaphone values of two
+String values
+ are equal. |
+
boolean |
+isDoubleMetaphoneEqual(String value1,
+ String value2,
+ boolean alternate)
+Check if the Double Metaphone values of two
+String values
+ are equal, optionally using the alternate value. |
+
void |
+setMaxCodeLen(int maxCodeLen)
+Sets the maxCodeLen.
+ |
+
public DoubleMetaphone()+
public String doubleMetaphone(String value)+
value
- String to encodepublic String doubleMetaphone(String value, + boolean alternate)+
value
- String to encodealternate
- use alternate encodepublic Object encode(Object obj) + throws EncoderException+
obj
is a String
(like Metaphone
).encode
in interface Encoder
obj
- Object to encode (should be of type String)EncoderException
- encode parameter is not of type Stringpublic String encode(String value)+
encode
in interface StringEncoder
value
- String to encodepublic boolean isDoubleMetaphoneEqual(String value1, + String value2)+
String
values
+ are equal.value1
- The left-hand side of the encoded String.equals(Object)
.value2
- The right-hand side of the encoded String.equals(Object)
.true
if the encoded String
s are equal;
+ false
otherwise.isDoubleMetaphoneEqual(String,String,boolean)
public boolean isDoubleMetaphoneEqual(String value1, + String value2, + boolean alternate)+
String
values
+ are equal, optionally using the alternate value.value1
- The left-hand side of the encoded String.equals(Object)
.value2
- The right-hand side of the encoded String.equals(Object)
.alternate
- use the alternate value if true
.true
if the encoded String
s are equal;
+ false
otherwise.public int getMaxCodeLen()+
public void setMaxCodeLen(int maxCodeLen)+
maxCodeLen
- The maxCodeLen to setprotected char charAt(String value, + int index)+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/MatchRatingApproachEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/MatchRatingApproachEncoder.html new file mode 100644 index 0000000..14f30df --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/MatchRatingApproachEncoder.html @@ -0,0 +1,352 @@ + + + + + + +public class MatchRatingApproachEncoder +extends Object +implements StringEncoder+
Constructor and Description | +
---|
MatchRatingApproachEncoder() |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object pObject)
+Encodes an Object using the Match Rating Approach algorithm.
+ |
+
String |
+encode(String name)
+Encodes a String using the Match Rating Approach (MRA) algorithm.
+ |
+
boolean |
+isEncodeEquals(String name1,
+ String name2)
+Determines if two names are homophonous via Match Rating Approach (MRA) algorithm.
+ |
+
public MatchRatingApproachEncoder()+
public final Object encode(Object pObject) + throws EncoderException+
encode
in interface Encoder
pObject
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.Stringpublic final String encode(String name)+
encode
in interface StringEncoder
name
- String object to encodepublic boolean isEncodeEquals(String name1, + String name2)+
encode(String)
.name1
- First of the 2 strings (names) to comparename2
- Second of the 2 names to comparetrue
if the encodings are identical false
otherwise.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Metaphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Metaphone.html new file mode 100644 index 0000000..44f2fd0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Metaphone.html @@ -0,0 +1,445 @@ + + + + + + +public class Metaphone +extends Object +implements StringEncoder+
+ Initial Java implementation by William B. Brogden. December, 1997. + Permission given by wbrogden for code to be used anywhere. +
+ Hanging on the Metaphone by Lawrence Philips in Computer Language of Dec. 1990, + p 39. +
+ Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations: +
++ They have had undocumented changes from the originally published algorithm. + For more information, see CODEC-57. +
+ This class is conditionally thread-safe.
+ The instance field maxCodeLen
is mutable setMaxCodeLen(int)
+ but is not volatile, and accesses are not synchronized.
+ If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
+ is used to ensure safe publication of the value between threads, and must not invoke setMaxCodeLen(int)
+ after initial setup.
Constructor and Description | +
---|
Metaphone()
+Creates an instance of the Metaphone encoder
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object obj)
+Encodes an Object using the metaphone algorithm.
+ |
+
String |
+encode(String str)
+Encodes a String using the Metaphone algorithm.
+ |
+
int |
+getMaxCodeLen()
+Returns the maxCodeLen.
+ |
+
boolean |
+isMetaphoneEqual(String str1,
+ String str2)
+Tests is the metaphones of two strings are identical.
+ |
+
String |
+metaphone(String txt)
+Find the metaphone value of a String.
+ |
+
void |
+setMaxCodeLen(int maxCodeLen)
+Sets the maxCodeLen.
+ |
+
public Metaphone()+
public String metaphone(String txt)+
txt
- String to find the metaphone code forpublic Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not
+ of type java.lang.Stringpublic String encode(String str)+
encode
in interface StringEncoder
str
- String object to encodepublic boolean isMetaphoneEqual(String str1, + String str2)+
str1
- First of two strings to comparestr2
- Second of two strings to comparetrue
if the metaphones of these strings are identical,
+ false
otherwise.public int getMaxCodeLen()+
public void setMaxCodeLen(int maxCodeLen)+
maxCodeLen
- The maxCodeLen to setCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Nysiis.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Nysiis.html new file mode 100644 index 0000000..5d9cb36 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Nysiis.html @@ -0,0 +1,443 @@ + + + + + + +public class Nysiis +extends Object +implements StringEncoder+
+ NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm. +
+ Algorithm description: +
+ 1. Transcode first characters of name + 1a. MAC -> MCC + 1b. KN -> NN + 1c. K -> C + 1d. PH -> FF + 1e. PF -> FF + 1f. SCH -> SSS + 2. Transcode last characters of name + 2a. EE, IE -> Y + 2b. DT,RT,RD,NT,ND -> D + 3. First character of key = first character of name + 4. Transcode remaining characters by following these rules, incrementing by one character each time + 4a. EV -> AF else A,E,I,O,U -> A + 4b. Q -> G + 4c. Z -> S + 4d. M -> N + 4e. KN -> N else K -> C + 4f. SCH -> SSS + 4g. PH -> FF + 4h. H -> If previous or next is nonvowel, previous + 4i. W -> If previous is vowel, previous + 4j. Add current to key if current != last key character + 5. If last character is S, remove it + 6. If last characters are AY, replace with Y + 7. If last character is A, remove it + 8. Collapse all strings of repeated characters + 9. Add original first character of name as first character of key ++
+ This class is immutable and thread-safe.
Soundex
Constructor and Description | +
---|
Nysiis()
+Creates an instance of the
+Nysiis encoder with strict mode (original form),
+ i.e. |
+
Nysiis(boolean strict)
+Create an instance of the
+Nysiis encoder with the specified strict mode:
+
+
+ true : encoded strings have a maximum length of 6
+ false : encoded strings may have arbitrary length
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object obj)
+Encodes an Object using the NYSIIS algorithm.
+ |
+
String |
+encode(String str)
+Encodes a String using the NYSIIS algorithm.
+ |
+
boolean |
+isStrict()
+Indicates the strict mode for this
+Nysiis encoder. |
+
String |
+nysiis(String str)
+Retrieves the NYSIIS code for a given String object.
+ |
+
public Nysiis()+
Nysiis
encoder with strict mode (original form),
+ i.e. encoded strings have a maximum length of 6.public Nysiis(boolean strict)+
Nysiis
encoder with the specified strict mode:
+
+ true
: encoded strings have a maximum length of 6false
: encoded strings may have arbitrary lengthstrict
- the strict modepublic Object encode(Object obj) + throws EncoderException+
EncoderException
if the supplied object is not of type
+ String
.encode
in interface Encoder
obj
- Object to encodeString
) containing the NYSIIS code which corresponds to the given String.EncoderException
- if the parameter supplied is not of a String
IllegalArgumentException
- if a character is not mappedpublic String encode(String str)+
encode
in interface StringEncoder
str
- A String object to encodeIllegalArgumentException
- if a character is not mappedpublic boolean isStrict()+
Nysiis
encoder.true
if the encoder is configured for strict mode, false
otherwiseCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/RefinedSoundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/RefinedSoundex.html new file mode 100644 index 0000000..f14e3e3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/RefinedSoundex.html @@ -0,0 +1,514 @@ + + + + + + +public class RefinedSoundex +extends Object +implements StringEncoder+
This class is immutable and thread-safe.
Modifier and Type | +Field and Description | +
---|---|
static RefinedSoundex |
+US_ENGLISH
+This static variable contains an instance of the RefinedSoundex using
+ the US_ENGLISH mapping.
+ |
+
static String |
+US_ENGLISH_MAPPING_STRING
+Mapping:
+ |
+
Constructor and Description | +
---|
RefinedSoundex()
+Creates an instance of the RefinedSoundex object using the default US
+ English mapping.
+ |
+
RefinedSoundex(char[] mapping)
+Creates a refined soundex instance using a custom mapping.
+ |
+
RefinedSoundex(String mapping)
+Creates a refined Soundex instance using a custom mapping.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
int |
+difference(String s1,
+ String s2)
+Returns the number of characters in the two encoded Strings that are the
+ same.
+ |
+
Object |
+encode(Object obj)
+Encodes an Object using the refined soundex algorithm.
+ |
+
String |
+encode(String str)
+Encodes a String using the refined soundex algorithm.
+ |
+
String |
+soundex(String str)
+Retrieves the Refined Soundex code for a given String object.
+ |
+
public static final String US_ENGLISH_MAPPING_STRING+
+ 0: A E I O U Y H W + 1: B P + 2: F V + 3: C K S + 4: G J + 5: Q X Z + 6: D T + 7: L + 8: M N + 9: R +
public static final RefinedSoundex US_ENGLISH+
public RefinedSoundex()+
public RefinedSoundex(char[] mapping)+
mapping
- Mapping array to use when finding the corresponding code for
+ a given characterpublic RefinedSoundex(String mapping)+
mapping
- Mapping string to use when finding the corresponding code for a given characterpublic int difference(String s1, + String s2) + throws EncoderException+
s1
- A String that will be encoded and compared.s2
- A String that will be encoded and compared.EncoderException
- if an error occurs encoding one of the stringsSoundexUtils.difference(StringEncoder,String,String)
,
+
+ MS T-SQL DIFFERENCEpublic Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.Stringpublic String encode(String str)+
encode
in interface StringEncoder
str
- A String object to encodeCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Soundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Soundex.html new file mode 100644 index 0000000..6ab1fb2 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/Soundex.html @@ -0,0 +1,681 @@ + + + + + + +public class Soundex +extends Object +implements StringEncoder+
maxLength
field is not actually used.Modifier and Type | +Field and Description | +
---|---|
static char |
+SILENT_MARKER
+The marker character used to indicate a silent (ignored) character.
+ |
+
static Soundex |
+US_ENGLISH
+An instance of Soundex using the US_ENGLISH_MAPPING mapping.
+ |
+
static Soundex |
+US_ENGLISH_GENEALOGY
+An instance of Soundex using the mapping as per the Genealogy site:
+ http://www.genealogy.com/articles/research/00000060.html
+ |
+
static String |
+US_ENGLISH_MAPPING_STRING
+This is a default mapping of the 26 letters used in US English.
+ |
+
static Soundex |
+US_ENGLISH_SIMPLIFIED
+An instance of Soundex using the Simplified Soundex mapping, as described here:
+ http://west-penwith.org.uk/misc/soundex.htm
+ |
+
Constructor and Description | +
---|
Soundex()
+Creates an instance using US_ENGLISH_MAPPING
+ |
+
Soundex(char[] mapping)
+Creates a soundex instance using the given mapping.
+ |
+
Soundex(String mapping)
+Creates a refined soundex instance using a custom mapping.
+ |
+
Soundex(String mapping,
+ boolean specialCaseHW)
+Creates a refined soundex instance using a custom mapping.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
int |
+difference(String s1,
+ String s2)
+Encodes the Strings and returns the number of characters in the two encoded Strings that are the same.
+ |
+
Object |
+encode(Object obj)
+Encodes an Object using the soundex algorithm.
+ |
+
String |
+encode(String str)
+Encodes a String using the soundex algorithm.
+ |
+
int |
+getMaxLength()
+Deprecated.
+
+This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
+ |
+
void |
+setMaxLength(int maxLength)
+Deprecated.
+
+This feature is not needed since the encoding size must be constant. Will be removed in 2.0.
+ |
+
String |
+soundex(String str)
+Retrieves the Soundex code for a given String object.
+ |
+
public static final char SILENT_MARKER+
+ Note: the US_ENGLISH_MAPPING_STRING
does not use this mechanism
+ because changing it might break existing code. Mappings that don't contain
+ a silent marker code are treated as though H and W are silent.
+
+ To override this, use the Soundex(String, boolean)
constructor.
public static final String US_ENGLISH_MAPPING_STRING+
0
for a letter position
+ means do not encode, but treat as a separator when it occurs between consonants with the same code.
+ + (This constant is provided as both an implementation convenience and to allow Javadoc to pick + up the value for the constant values page.) +
+ Note that letters H and W are treated specially. + They are ignored (after the first letter) and don't act as separators + between consonants with the same code.
US_ENGLISH_MAPPING
,
+Constant Field Valuespublic static final Soundex US_ENGLISH+
US_ENGLISH_MAPPING
,
+US_ENGLISH_MAPPING_STRING
public static final Soundex US_ENGLISH_SIMPLIFIED+
+ This treats H and W the same as vowels (AEIOUY).
+ Such letters aren't encoded (after the first), but they do
+ act as separators when dropping duplicate codes.
+ The mapping is otherwise the same as for US_ENGLISH
+
public static final Soundex US_ENGLISH_GENEALOGY+
+ This treats vowels (AEIOUY), H and W as silent letters. + Such letters are ignored (after the first) and do not + act as separators when dropping duplicate codes. +
+ The codes for consonants are otherwise the same as for
+ US_ENGLISH_MAPPING_STRING
and US_ENGLISH_SIMPLIFIED
public Soundex()+
Soundex(char[])
,
+US_ENGLISH_MAPPING
public Soundex(char[] mapping)+
+ If the mapping contains an instance of SILENT_MARKER
then H and W are not given special treatment
mapping
- Mapping array to use when finding the corresponding code for a given characterpublic Soundex(String mapping)+
+ If the mapping contains an instance of SILENT_MARKER
then H and W are not given special treatment
mapping
- Mapping string to use when finding the corresponding code for a given characterpublic Soundex(String mapping, + boolean specialCaseHW)+
mapping
- Mapping string to use when finding the corresponding code for a given characterspecialCaseHW
- if true, thenpublic int difference(String s1, + String s2) + throws EncoderException+
s1
- A String that will be encoded and compared.s2
- A String that will be encoded and compared.EncoderException
- if an error occurs encoding one of the stringsSoundexUtils.difference(StringEncoder,String,String)
,
+ MS
+ T-SQL DIFFERENCE public Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- Object to encodeEncoderException
- if the parameter supplied is not of type java.lang.StringIllegalArgumentException
- if a character is not mappedpublic String encode(String str)+
encode
in interface StringEncoder
str
- A String object to encodeIllegalArgumentException
- if a character is not mapped@Deprecated +public int getMaxLength()+
@Deprecated +public void setMaxLength(int maxLength)+
maxLength
- The maxLength to setpublic String soundex(String str)+
str
- String to encode using the Soundex algorithmIllegalArgumentException
- if a character is not mappedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/BeiderMorseEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/BeiderMorseEncoder.html new file mode 100644 index 0000000..8b06554 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/BeiderMorseEncoder.html @@ -0,0 +1,525 @@ + + + + + + +public class BeiderMorseEncoder +extends Object +implements StringEncoder+
+ Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range of + words. +
+ This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it is mutable,
+ and may not be thread-safe. If you require a guaranteed thread-safe encoding then use PhoneticEngine
+ directly.
+
+ Encoding overview +
+ Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what
+ language the word comes from. For example, if it ends in "ault
" then it infers that the word is French.
+ Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some runs of
+ letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up into phonemes at
+ different places, so this stage results in a set of possible language-specific phonetic representations. Lastly, this
+ language-specific phonetic representation is processed by a table of rules that re-writes it phonetically taking into
+ account systematic pronunciation differences between languages, to move it towards a pan-indo-european phonetic
+ representation. Again, sometimes there are multiple ways this could be done and sometimes things that can be
+ pronounced in several ways in the source language have only one way to represent them in this average phonetic
+ language, so the result is again a set of phonetic spellings.
+
+ Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated. In
+ this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final encoding.
+ Secondly, some names have standard prefixes, for example, "Mac/Mc
" in Scottish (English) names. As
+ sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word is encoded once
+ with the prefix and once without it. The resulting encoding contains one and then the other result.
+
+ Encoding format +
+ Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where there
+ are multiple possible phonetic representations, these are joined with a pipe (|
) character. If multiple
+ hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed in elipses and
+ these blocks are then joined with hyphens. For example, "d'ortley
" has a possible prefix. The form
+ without prefix encodes to "ortlaj|ortlej
", while the form with prefix encodes to "
+ dortlaj|dortlej
". Thus, the full, combined encoding is "(ortlaj|ortlej)-(dortlaj|dortlej)
".
+
+ The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many
+ potential phonetic interpretations. For example, "Renault
" encodes to "
+ rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult
". The APPROX
rules will tend to produce larger
+ encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word.
+ Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by
+ splitting on pipe (|
) and indexing under each of these alternatives.
+
+ Note: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation. +
+ This class is Not ThreadSafe +
Constructor and Description | +
---|
BeiderMorseEncoder() |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+encode(Object source)
+Encodes an "Object" and returns the encoded content as an Object.
+ |
+
String |
+encode(String source)
+Encodes a String and returns a String.
+ |
+
NameType |
+getNameType()
+Gets the name type currently in operation.
+ |
+
RuleType |
+getRuleType()
+Gets the rule type currently in operation.
+ |
+
boolean |
+isConcat()
+Discovers if multiple possible encodings are concatenated.
+ |
+
void |
+setConcat(boolean concat)
+Sets how multiple possible phonetic encodings are combined.
+ |
+
void |
+setMaxPhonemes(int maxPhonemes)
+Sets the number of maximum of phonemes that shall be considered by the engine.
+ |
+
void |
+setNameType(NameType nameType)
+Sets the type of name.
+ |
+
void |
+setRuleType(RuleType ruleType)
+Sets the rule type to apply.
+ |
+
public Object encode(Object source) + throws EncoderException+
Encoder
byte[]
or String
s depending on the implementation used.encode
in interface Encoder
source
- An object to encodeEncoderException
- An encoder exception is thrown if the encoder experiences a failure condition during the encoding
+ process.public String encode(String source) + throws EncoderException+
StringEncoder
encode
in interface StringEncoder
source
- the String to encodeEncoderException
- thrown if there is an error condition during the encoding process.public NameType getNameType()+
public RuleType getRuleType()+
public boolean isConcat()+
public void setConcat(boolean concat)+
concat
- true if multiple encodings are to be combined with a '|', false if just the first one is
+ to be consideredpublic void setNameType(NameType nameType)+
NameType.GENERIC
unless you specifically want phonetic encodings
+ optimized for Ashkenazi or Sephardic Jewish family names.nameType
- the NameType in usepublic void setRuleType(RuleType ruleType)+
ruleType
- RuleType.APPROX
or RuleType.EXACT
for approximate or exact phonetic matchespublic void setMaxPhonemes(int maxPhonemes)+
maxPhonemes
- the maximum number of phonemes returned by the engineCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Lang.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Lang.html new file mode 100644 index 0000000..e71cc5b --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Lang.html @@ -0,0 +1,358 @@ + + + + + + +public class Lang +extends Object+
+ This class encapsulates rules used to guess the possible languages that a word originates from. This is + done by reference to a whole series of rules distributed in resource files. +
+ Instances of this class are typically managed through the static factory method instance(). + Unless you are developing your own language guessing rules, you will not need to interact with this class directly. +
+ This class is intended to be immutable and thread-safe. +
+ Lang resources +
+ Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files. + They are systematically named following the pattern: +
org/apache/commons/codec/language/bm/lang.txt+ The format of these resources is the following: +
+ Port of lang.php
Modifier and Type | +Method and Description | +
---|---|
String |
+guessLanguage(String text)
+Guesses the language of a word.
+ |
+
Languages.LanguageSet |
+guessLanguages(String input)
+Guesses the languages of a word.
+ |
+
static Lang |
+instance(NameType nameType)
+Gets a Lang instance for one of the supported NameTypes.
+ |
+
static Lang |
+loadFromResource(String languageRulesResourceName,
+ Languages languages)
+Loads language rules from a resource.
+ |
+
public static Lang instance(NameType nameType)+
nameType
- the NameType to look uppublic static Lang loadFromResource(String languageRulesResourceName, + Languages languages)+
+ In normal use, you will obtain instances of Lang through the instance(NameType)
method.
+ You will only need to call this yourself if you are developing custom language mapping rules.
languageRulesResourceName
- the fully-qualified resource name to loadlanguages
- the languages that these rules will supportpublic String guessLanguage(String text)+
text
- the wordLanguages.ANY
if there was no unique matchpublic Languages.LanguageSet guessLanguages(String input)+
input
- the wordCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.LanguageSet.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.LanguageSet.html new file mode 100644 index 0000000..6239e66 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.LanguageSet.html @@ -0,0 +1,346 @@ + + + + + + +public abstract static class Languages.LanguageSet +extends Object+
Constructor and Description | +
---|
LanguageSet() |
+
Modifier and Type | +Method and Description | +
---|---|
abstract boolean |
+contains(String language) |
+
static Languages.LanguageSet |
+from(Set<String> langs) |
+
abstract String |
+getAny() |
+
abstract boolean |
+isEmpty() |
+
abstract boolean |
+isSingleton() |
+
abstract Languages.LanguageSet |
+restrictTo(Languages.LanguageSet other) |
+
public static Languages.LanguageSet from(Set<String> langs)+
public abstract boolean contains(String language)+
public abstract String getAny()+
public abstract boolean isEmpty()+
public abstract boolean isSingleton()+
public abstract Languages.LanguageSet restrictTo(Languages.LanguageSet other)+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.SomeLanguages.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.SomeLanguages.html new file mode 100644 index 0000000..3695d7c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.SomeLanguages.html @@ -0,0 +1,370 @@ + + + + + + +public static final class Languages.SomeLanguages +extends Languages.LanguageSet+
Modifier and Type | +Method and Description | +
---|---|
boolean |
+contains(String language) |
+
String |
+getAny() |
+
Set<String> |
+getLanguages() |
+
boolean |
+isEmpty() |
+
boolean |
+isSingleton() |
+
Languages.LanguageSet |
+merge(Languages.LanguageSet other) |
+
Languages.LanguageSet |
+restrictTo(Languages.LanguageSet other) |
+
String |
+toString() |
+
from
public boolean contains(String language)+
contains
in class Languages.LanguageSet
public String getAny()+
getAny
in class Languages.LanguageSet
public boolean isEmpty()+
isEmpty
in class Languages.LanguageSet
public boolean isSingleton()+
isSingleton
in class Languages.LanguageSet
public Languages.LanguageSet restrictTo(Languages.LanguageSet other)+
restrictTo
in class Languages.LanguageSet
public Languages.LanguageSet merge(Languages.LanguageSet other)+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.html new file mode 100644 index 0000000..85592eb --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Languages.html @@ -0,0 +1,388 @@ + + + + + + +public class Languages +extends Object+
+ Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are + systematically named following the pattern: +
org/apache/commons/codec/language/bm/${NameType.getName()
languages.txt
+ + The format of these resources is the following: +
+ Ported from language.php +
+ This class is immutable and thread-safe.
Modifier and Type | +Class and Description | +
---|---|
static class |
+Languages.LanguageSet
+A set of languages.
+ |
+
static class |
+Languages.SomeLanguages
+Some languages, explicitly enumerated.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static String |
+ANY |
+
static Languages.LanguageSet |
+ANY_LANGUAGE
+Any/all languages.
+ |
+
static Languages.LanguageSet |
+NO_LANGUAGES
+No languages at all.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Languages |
+getInstance(NameType nameType) |
+
static Languages |
+getInstance(String languagesResourceName) |
+
Set<String> |
+getLanguages() |
+
public static final String ANY+
public static final Languages.LanguageSet NO_LANGUAGES+
public static final Languages.LanguageSet ANY_LANGUAGE+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/NameType.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/NameType.html new file mode 100644 index 0000000..797b507 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/NameType.html @@ -0,0 +1,392 @@ + + + + + + +public enum NameType +extends Enum<NameType>+
GENERIC
. The
+ GENERIC
NameType should work reasonably well for non-name words. The other encodings are
+ specifically tuned to family names, and may not work well at all for general text.Enum Constant and Description | +
---|
ASHKENAZI
+Ashkenazi family names
+ |
+
GENERIC
+Generic names and words
+ |
+
SEPHARDIC
+Sephardic family names
+ |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+getName()
+Gets the short version of the name type.
+ |
+
static NameType |
+valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static NameType[] |
+values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
public static final NameType ASHKENAZI+
public static final NameType GENERIC+
public static final NameType SEPHARDIC+
public static NameType[] values()+
+for (NameType c : NameType.values()) + System.out.println(c); +
public static NameType valueOf(String name)+
name
- the name of the enum constant to be returned.IllegalArgumentException
- if this enum type has no constant with the specified nameNullPointerException
- if the argument is nullpublic String getName()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/PhoneticEngine.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/PhoneticEngine.html new file mode 100644 index 0000000..4e50d7e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/PhoneticEngine.html @@ -0,0 +1,470 @@ + + + + + + +public class PhoneticEngine +extends Object+
+ This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes + into account the likely source language. Next, this phonetic representation is converted into a + pan-European 'average' representation, allowing comparison between different versions of essentially + the same word from different languages. +
+ This class is intentionally immutable and thread-safe. + If you wish to alter the settings for a PhoneticEngine, you + must make a new one with the updated settings. +
+ Ported from phoneticengine.php
Constructor and Description | +
---|
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat)
+Generates a new, fully-configured phonetic engine.
+ |
+
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat,
+ int maxPhonemes)
+Generates a new, fully-configured phonetic engine.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+encode(String input)
+Encodes a string to its phonetic representation.
+ |
+
String |
+encode(String input,
+ Languages.LanguageSet languageSet)
+Encodes an input string into an output phonetic representation, given a set of possible origin languages.
+ |
+
Lang |
+getLang()
+Gets the Lang language guessing rules being used.
+ |
+
int |
+getMaxPhonemes()
+Gets the maximum number of phonemes the engine will calculate for a given input.
+ |
+
NameType |
+getNameType()
+Gets the NameType being used.
+ |
+
RuleType |
+getRuleType()
+Gets the RuleType being used.
+ |
+
boolean |
+isConcat()
+Gets if multiple phonetic encodings are concatenated or if just the first one is kept.
+ |
+
public PhoneticEngine(NameType nameType, + RuleType ruleType, + boolean concat)+
nameType
- the type of names it will useruleType
- the type of rules it will applyconcat
- if it will concatenate multiple encodingspublic PhoneticEngine(NameType nameType, + RuleType ruleType, + boolean concat, + int maxPhonemes)+
nameType
- the type of names it will useruleType
- the type of rules it will applyconcat
- if it will concatenate multiple encodingsmaxPhonemes
- the maximum number of phonemes that will be handledpublic String encode(String input)+
input
- the String to encodepublic String encode(String input, + Languages.LanguageSet languageSet)+
input
- String to phoneticise; a String with dashes or spaces separating each wordlanguageSet
- set of possible origin languagespublic Lang getLang()+
public NameType getNameType()+
public RuleType getRuleType()+
public boolean isConcat()+
public int getMaxPhonemes()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.Phoneme.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.Phoneme.html new file mode 100644 index 0000000..9978661 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.Phoneme.html @@ -0,0 +1,459 @@ + + + + + + +public static final class Rule.Phoneme +extends Object +implements Rule.PhonemeExpr+
Modifier and Type | +Field and Description | +
---|---|
static Comparator<Rule.Phoneme> |
+COMPARATOR |
+
Constructor and Description | +
---|
Phoneme(CharSequence phonemeText,
+ Languages.LanguageSet languages) |
+
Phoneme(Rule.Phoneme phonemeLeft,
+ Rule.Phoneme phonemeRight) |
+
Phoneme(Rule.Phoneme phonemeLeft,
+ Rule.Phoneme phonemeRight,
+ Languages.LanguageSet languages) |
+
Modifier and Type | +Method and Description | +
---|---|
Rule.Phoneme |
+append(CharSequence str) |
+
Languages.LanguageSet |
+getLanguages() |
+
Iterable<Rule.Phoneme> |
+getPhonemes() |
+
CharSequence |
+getPhonemeText() |
+
Rule.Phoneme |
+join(Rule.Phoneme right)
+Deprecated.
+
+since 1.9
+ |
+
Rule.Phoneme |
+mergeWithLanguage(Languages.LanguageSet lang)
+Returns a new Phoneme with the same text but a union of its
+ current language set and the given one.
+ |
+
String |
+toString() |
+
public static final Comparator<Rule.Phoneme> COMPARATOR+
public Phoneme(CharSequence phonemeText, + Languages.LanguageSet languages)+
public Phoneme(Rule.Phoneme phonemeLeft, + Rule.Phoneme phonemeRight)+
public Phoneme(Rule.Phoneme phonemeLeft, + Rule.Phoneme phonemeRight, + Languages.LanguageSet languages)+
public Rule.Phoneme append(CharSequence str)+
public Languages.LanguageSet getLanguages()+
public Iterable<Rule.Phoneme> getPhonemes()+
getPhonemes
in interface Rule.PhonemeExpr
public CharSequence getPhonemeText()+
@Deprecated +public Rule.Phoneme join(Rule.Phoneme right)+
right
- the Phoneme to joinpublic Rule.Phoneme mergeWithLanguage(Languages.LanguageSet lang)+
lang
- the language set to mergeCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeExpr.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeExpr.html new file mode 100644 index 0000000..7382e15 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeExpr.html @@ -0,0 +1,230 @@ + + + + + + +public static interface Rule.PhonemeExpr
+Modifier and Type | +Method and Description | +
---|---|
Iterable<Rule.Phoneme> |
+getPhonemes() |
+
Iterable<Rule.Phoneme> getPhonemes()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeList.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeList.html new file mode 100644 index 0000000..13cfc82 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.PhonemeList.html @@ -0,0 +1,285 @@ + + + + + + +public static final class Rule.PhonemeList +extends Object +implements Rule.PhonemeExpr+
Constructor and Description | +
---|
PhonemeList(List<Rule.Phoneme> phonemes) |
+
Modifier and Type | +Method and Description | +
---|---|
List<Rule.Phoneme> |
+getPhonemes() |
+
public PhonemeList(List<Rule.Phoneme> phonemes)+
public List<Rule.Phoneme> getPhonemes()+
getPhonemes
in interface Rule.PhonemeExpr
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.RPattern.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.RPattern.html new file mode 100644 index 0000000..ea6f94e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.RPattern.html @@ -0,0 +1,227 @@ + + + + + + +public static interface Rule.RPattern
+Modifier and Type | +Method and Description | +
---|---|
boolean |
+isMatch(CharSequence input) |
+
boolean isMatch(CharSequence input)+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.html new file mode 100644 index 0000000..c64ab36 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/Rule.html @@ -0,0 +1,630 @@ + + + + + + +public class Rule +extends Object+
+ Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply + and a logical flag indicating if all languages must be in play. A rule matches if: +
+ Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user + to explicitly construct their own. +
+ Rules are immutable and thread-safe. +
+ Rules resources +
+ Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically + named following the pattern: +
org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt+
+ The format of these resources is the following: +
Modifier and Type | +Class and Description | +
---|---|
static class |
+Rule.Phoneme |
+
static interface |
+Rule.PhonemeExpr |
+
static class |
+Rule.PhonemeList |
+
static interface |
+Rule.RPattern
+A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static String |
+ALL |
+
static Rule.RPattern |
+ALL_STRINGS_RMATCHER |
+
Constructor and Description | +
---|
Rule(String pattern,
+ String lContext,
+ String rContext,
+ Rule.PhonemeExpr phoneme)
+Creates a new rule.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static List<Rule> |
+getInstance(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static List<Rule> |
+getInstance(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
static Map<String,List<Rule>> |
+getInstanceMap(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static Map<String,List<Rule>> |
+getInstanceMap(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
Rule.RPattern |
+getLContext()
+Gets the left context.
+ |
+
String |
+getPattern()
+Gets the pattern.
+ |
+
Rule.PhonemeExpr |
+getPhoneme()
+Gets the phoneme.
+ |
+
Rule.RPattern |
+getRContext()
+Gets the right context.
+ |
+
boolean |
+patternAndContextMatches(CharSequence input,
+ int i)
+Decides if the pattern and context match the input starting at a position.
+ |
+
public static final Rule.RPattern ALL_STRINGS_RMATCHER+
public static final String ALL+
public Rule(String pattern, + String lContext, + String rContext, + Rule.PhonemeExpr phoneme)+
pattern
- the patternlContext
- the left contextrContext
- the right contextphoneme
- the resulting phonemepublic static List<Rule> getInstance(NameType nameType, + RuleType rt, + Languages.LanguageSet langs)+
nameType
- the NameType to considerrt
- the RuleType to considerlangs
- the set of languages to considerpublic static List<Rule> getInstance(NameType nameType, + RuleType rt, + String lang)+
nameType
- the NameType to considerrt
- the RuleType to considerlang
- the language to considerpublic static Map<String,List<Rule>> getInstanceMap(NameType nameType, + RuleType rt, + Languages.LanguageSet langs)+
nameType
- the NameType to considerrt
- the RuleType to considerlangs
- the set of languages to considerpublic static Map<String,List<Rule>> getInstanceMap(NameType nameType, + RuleType rt, + String lang)+
nameType
- the NameType to considerrt
- the RuleType to considerlang
- the language to considerpublic Rule.RPattern getLContext()+
public String getPattern()+
public Rule.PhonemeExpr getPhoneme()+
public Rule.RPattern getRContext()+
public boolean patternAndContextMatches(CharSequence input, + int i)+
lContext
matches input
up to i
, pattern
matches at i and
+ rContext
matches from the end of the match of pattern
to the end of input
.input
- the input Stringi
- the int position within the inputCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/RuleType.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/RuleType.html new file mode 100644 index 0000000..83ab103 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/RuleType.html @@ -0,0 +1,390 @@ + + + + + + +public enum RuleType +extends Enum<RuleType>+
Enum Constant and Description | +
---|
APPROX
+Approximate rules, which will lead to the largest number of phonetic interpretations.
+ |
+
EXACT
+Exact rules, which will lead to a minimum number of phonetic interpretations.
+ |
+
RULES
+For internal use only.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+getName()
+Gets the rule name.
+ |
+
static RuleType |
+valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static RuleType[] |
+values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
public static final RuleType APPROX+
public static final RuleType EXACT+
public static final RuleType RULES+ +
public static RuleType[] values()+
+for (RuleType c : RuleType.values()) + System.out.println(c); +
public static RuleType valueOf(String name)+
name
- the name of the enum constant to be returned.IllegalArgumentException
- if this enum type has no constant with the specified nameNullPointerException
- if the argument is nullpublic String getName()+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/BeiderMorseEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/BeiderMorseEncoder.html new file mode 100644 index 0000000..2a85cb0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/BeiderMorseEncoder.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Lang.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Lang.html new file mode 100644 index 0000000..7117de1 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Lang.html @@ -0,0 +1,182 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Lang |
+PhoneticEngine.getLang()
+Gets the Lang language guessing rules being used.
+ |
+
static Lang |
+Lang.instance(NameType nameType)
+Gets a Lang instance for one of the supported NameTypes.
+ |
+
static Lang |
+Lang.loadFromResource(String languageRulesResourceName,
+ Languages languages)
+Loads language rules from a resource.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.LanguageSet.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.LanguageSet.html new file mode 100644 index 0000000..7bffa20 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.LanguageSet.html @@ -0,0 +1,293 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
static class |
+Languages.SomeLanguages
+Some languages, explicitly enumerated.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static Languages.LanguageSet |
+Languages.ANY_LANGUAGE
+Any/all languages.
+ |
+
static Languages.LanguageSet |
+Languages.NO_LANGUAGES
+No languages at all.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Languages.LanguageSet |
+Languages.LanguageSet.from(Set<String> langs) |
+
Languages.LanguageSet |
+Rule.Phoneme.getLanguages() |
+
Languages.LanguageSet |
+Lang.guessLanguages(String input)
+Guesses the languages of a word.
+ |
+
Languages.LanguageSet |
+Languages.SomeLanguages.merge(Languages.LanguageSet other) |
+
abstract Languages.LanguageSet |
+Languages.LanguageSet.restrictTo(Languages.LanguageSet other) |
+
Languages.LanguageSet |
+Languages.SomeLanguages.restrictTo(Languages.LanguageSet other) |
+
Modifier and Type | +Method and Description | +
---|---|
String |
+PhoneticEngine.encode(String input,
+ Languages.LanguageSet languageSet)
+Encodes an input string into an output phonetic representation, given a set of possible origin languages.
+ |
+
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
Languages.LanguageSet |
+Languages.SomeLanguages.merge(Languages.LanguageSet other) |
+
Rule.Phoneme |
+Rule.Phoneme.mergeWithLanguage(Languages.LanguageSet lang)
+Returns a new Phoneme with the same text but a union of its
+ current language set and the given one.
+ |
+
abstract Languages.LanguageSet |
+Languages.LanguageSet.restrictTo(Languages.LanguageSet other) |
+
Languages.LanguageSet |
+Languages.SomeLanguages.restrictTo(Languages.LanguageSet other) |
+
Constructor and Description | +
---|
Phoneme(CharSequence phonemeText,
+ Languages.LanguageSet languages) |
+
Phoneme(Rule.Phoneme phonemeLeft,
+ Rule.Phoneme phonemeRight,
+ Languages.LanguageSet languages) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.SomeLanguages.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.SomeLanguages.html new file mode 100644 index 0000000..30ab4e6 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.SomeLanguages.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.html new file mode 100644 index 0000000..075ce3d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Languages.html @@ -0,0 +1,187 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Languages |
+Languages.getInstance(NameType nameType) |
+
static Languages |
+Languages.getInstance(String languagesResourceName) |
+
Modifier and Type | +Method and Description | +
---|---|
static Lang |
+Lang.loadFromResource(String languageRulesResourceName,
+ Languages languages)
+Loads language rules from a resource.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/NameType.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/NameType.html new file mode 100644 index 0000000..d8207d6 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/NameType.html @@ -0,0 +1,268 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
NameType |
+PhoneticEngine.getNameType()
+Gets the NameType being used.
+ |
+
NameType |
+BeiderMorseEncoder.getNameType()
+Gets the name type currently in operation.
+ |
+
static NameType |
+NameType.valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static NameType[] |
+NameType.values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static Languages |
+Languages.getInstance(NameType nameType) |
+
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
static Lang |
+Lang.instance(NameType nameType)
+Gets a Lang instance for one of the supported NameTypes.
+ |
+
void |
+BeiderMorseEncoder.setNameType(NameType nameType)
+Sets the type of name.
+ |
+
Constructor and Description | +
---|
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat)
+Generates a new, fully-configured phonetic engine.
+ |
+
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat,
+ int maxPhonemes)
+Generates a new, fully-configured phonetic engine.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/PhoneticEngine.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/PhoneticEngine.html new file mode 100644 index 0000000..51a8d9b --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/PhoneticEngine.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.Phoneme.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.Phoneme.html new file mode 100644 index 0000000..f9bf891 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.Phoneme.html @@ -0,0 +1,261 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static Comparator<Rule.Phoneme> |
+Rule.Phoneme.COMPARATOR |
+
Modifier and Type | +Method and Description | +
---|---|
Rule.Phoneme |
+Rule.Phoneme.append(CharSequence str) |
+
Rule.Phoneme |
+Rule.Phoneme.join(Rule.Phoneme right)
+Deprecated.
+
+since 1.9
+ |
+
Rule.Phoneme |
+Rule.Phoneme.mergeWithLanguage(Languages.LanguageSet lang)
+Returns a new Phoneme with the same text but a union of its
+ current language set and the given one.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Iterable<Rule.Phoneme> |
+Rule.Phoneme.getPhonemes() |
+
Iterable<Rule.Phoneme> |
+Rule.PhonemeExpr.getPhonemes() |
+
List<Rule.Phoneme> |
+Rule.PhonemeList.getPhonemes() |
+
Modifier and Type | +Method and Description | +
---|---|
Rule.Phoneme |
+Rule.Phoneme.join(Rule.Phoneme right)
+Deprecated.
+
+since 1.9
+ |
+
Constructor and Description | +
---|
Phoneme(Rule.Phoneme phonemeLeft,
+ Rule.Phoneme phonemeRight) |
+
Phoneme(Rule.Phoneme phonemeLeft,
+ Rule.Phoneme phonemeRight,
+ Languages.LanguageSet languages) |
+
Constructor and Description | +
---|
PhonemeList(List<Rule.Phoneme> phonemes) |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeExpr.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeExpr.html new file mode 100644 index 0000000..13fbf29 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeExpr.html @@ -0,0 +1,202 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
static class |
+Rule.Phoneme |
+
static class |
+Rule.PhonemeList |
+
Modifier and Type | +Method and Description | +
---|---|
Rule.PhonemeExpr |
+Rule.getPhoneme()
+Gets the phoneme.
+ |
+
Constructor and Description | +
---|
Rule(String pattern,
+ String lContext,
+ String rContext,
+ Rule.PhonemeExpr phoneme)
+Creates a new rule.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeList.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeList.html new file mode 100644 index 0000000..8e0cd84 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.PhonemeList.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.RPattern.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.RPattern.html new file mode 100644 index 0000000..88cef2a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.RPattern.html @@ -0,0 +1,188 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static Rule.RPattern |
+Rule.ALL_STRINGS_RMATCHER |
+
Modifier and Type | +Method and Description | +
---|---|
Rule.RPattern |
+Rule.getLContext()
+Gets the left context.
+ |
+
Rule.RPattern |
+Rule.getRContext()
+Gets the right context.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.html new file mode 100644 index 0000000..14456bd --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/Rule.html @@ -0,0 +1,195 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/RuleType.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/RuleType.html new file mode 100644 index 0000000..04818d3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/class-use/RuleType.html @@ -0,0 +1,258 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
RuleType |
+PhoneticEngine.getRuleType()
+Gets the RuleType being used.
+ |
+
RuleType |
+BeiderMorseEncoder.getRuleType()
+Gets the rule type currently in operation.
+ |
+
static RuleType |
+RuleType.valueOf(String name)
+Returns the enum constant of this type with the specified name.
+ |
+
static RuleType[] |
+RuleType.values()
+Returns an array containing the constants of this enum type, in
+the order they are declared.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static List<Rule> |
+Rule.getInstance(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ Languages.LanguageSet langs)
+Gets rules for a combination of name type, rule type and languages.
+ |
+
static Map<String,List<Rule>> |
+Rule.getInstanceMap(NameType nameType,
+ RuleType rt,
+ String lang)
+Gets rules for a combination of name type, rule type and a single language.
+ |
+
void |
+BeiderMorseEncoder.setRuleType(RuleType ruleType)
+Sets the rule type to apply.
+ |
+
Constructor and Description | +
---|
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat)
+Generates a new, fully-configured phonetic engine.
+ |
+
PhoneticEngine(NameType nameType,
+ RuleType ruleType,
+ boolean concat,
+ int maxPhonemes)
+Generates a new, fully-configured phonetic engine.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-frame.html new file mode 100644 index 0000000..f907e9d --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-frame.html @@ -0,0 +1,38 @@ + + + + + + +See: Description
+Interface | +Description | +
---|---|
Rule.PhonemeExpr | ++ |
Rule.RPattern | +
+ A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
+ |
+
Class | +Description | +
---|---|
BeiderMorseEncoder | +
+ Encodes strings into their Beider-Morse phonetic encoding.
+ |
+
Lang | +
+ Language guessing utility.
+ |
+
Languages | +
+ Language codes.
+ |
+
Languages.LanguageSet | +
+ A set of languages.
+ |
+
Languages.SomeLanguages | +
+ Some languages, explicitly enumerated.
+ |
+
PhoneticEngine | +
+ Converts words into potential phonetic representations.
+ |
+
Rule | +
+ A phoneme rule.
+ |
+
Rule.Phoneme | ++ |
Rule.PhonemeList | ++ |
Enum | +Description | +
---|---|
NameType | +
+ Supported types of names.
+ |
+
RuleType | +
+ Types of rule.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-tree.html new file mode 100644 index 0000000..580a375 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-tree.html @@ -0,0 +1,167 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-use.html new file mode 100644 index 0000000..88be04a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/bm/package-use.html @@ -0,0 +1,198 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
Class and Description | +
---|
Lang
+ Language guessing utility.
+ |
+
Languages
+ Language codes.
+ |
+
Languages.LanguageSet
+ A set of languages.
+ |
+
NameType
+ Supported types of names.
+ |
+
Rule
+ A phoneme rule.
+ |
+
Rule.Phoneme | +
Rule.PhonemeExpr | +
Rule.RPattern
+ A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
+ |
+
RuleType
+ Types of rule.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/AbstractCaverphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/AbstractCaverphone.html new file mode 100644 index 0000000..160b05c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/AbstractCaverphone.html @@ -0,0 +1,175 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
Modifier and Type | +Class and Description | +
---|---|
class |
+Caverphone1
+Encodes a string into a Caverphone 1.0 value.
+ |
+
class |
+Caverphone2
+Encodes a string into a Caverphone 2.0 value.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone.html new file mode 100644 index 0000000..0dfbf75 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone1.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone1.html new file mode 100644 index 0000000..e8f5506 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone1.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone2.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone2.html new file mode 100644 index 0000000..8484249 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Caverphone2.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/ColognePhonetic.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/ColognePhonetic.html new file mode 100644 index 0000000..39193d3 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/ColognePhonetic.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DaitchMokotoffSoundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DaitchMokotoffSoundex.html new file mode 100644 index 0000000..aafa09b --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DaitchMokotoffSoundex.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.DoubleMetaphoneResult.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.DoubleMetaphoneResult.html new file mode 100644 index 0000000..322ebc0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.DoubleMetaphoneResult.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.html new file mode 100644 index 0000000..bdc4011 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/DoubleMetaphone.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/MatchRatingApproachEncoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/MatchRatingApproachEncoder.html new file mode 100644 index 0000000..901290e --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/MatchRatingApproachEncoder.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Metaphone.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Metaphone.html new file mode 100644 index 0000000..1e611ac --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Metaphone.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Nysiis.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Nysiis.html new file mode 100644 index 0000000..23a7c2c --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Nysiis.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/RefinedSoundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/RefinedSoundex.html new file mode 100644 index 0000000..30081f5 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/RefinedSoundex.html @@ -0,0 +1,170 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static RefinedSoundex |
+RefinedSoundex.US_ENGLISH
+This static variable contains an instance of the RefinedSoundex using
+ the US_ENGLISH mapping.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Soundex.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Soundex.html new file mode 100644 index 0000000..8119a61 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/class-use/Soundex.html @@ -0,0 +1,183 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
Modifier and Type | +Field and Description | +
---|---|
static Soundex |
+Soundex.US_ENGLISH
+An instance of Soundex using the US_ENGLISH_MAPPING mapping.
+ |
+
static Soundex |
+Soundex.US_ENGLISH_GENEALOGY
+An instance of Soundex using the mapping as per the Genealogy site:
+ http://www.genealogy.com/articles/research/00000060.html
+ |
+
static Soundex |
+Soundex.US_ENGLISH_SIMPLIFIED
+An instance of Soundex using the Simplified Soundex mapping, as described here:
+ http://west-penwith.org.uk/misc/soundex.htm
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-frame.html new file mode 100644 index 0000000..d6527f6 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-frame.html @@ -0,0 +1,31 @@ + + + + + + +Class | +Description | +
---|---|
AbstractCaverphone | +
+ Encodes a string into a Caverphone value.
+ |
+
Caverphone | +Deprecated
+ 1.5 Replaced by
+Caverphone2 , will be removed in 2.0. |
+
Caverphone1 | +
+ Encodes a string into a Caverphone 1.0 value.
+ |
+
Caverphone2 | +
+ Encodes a string into a Caverphone 2.0 value.
+ |
+
ColognePhonetic | +
+ Encodes a string into a Cologne Phonetic value.
+ |
+
DaitchMokotoffSoundex | +
+ Encodes a string into a Daitch-Mokotoff Soundex value.
+ |
+
DoubleMetaphone | +
+ Encodes a string into a double metaphone value.
+ |
+
MatchRatingApproachEncoder | +
+ Match Rating Approach Phonetic Algorithm Developed by Western Airlines in 1977.
+ |
+
Metaphone | +
+ Encodes a string into a Metaphone value.
+ |
+
Nysiis | +
+ Encodes a string into a NYSIIS value.
+ |
+
RefinedSoundex | +
+ Encodes a string into a Refined Soundex value.
+ |
+
Soundex | +
+ Encodes a string into a Soundex value.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-tree.html new file mode 100644 index 0000000..543914a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-tree.html @@ -0,0 +1,153 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-use.html new file mode 100644 index 0000000..9e6fcaf --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/language/package-use.html @@ -0,0 +1,172 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
Class and Description | +
---|
AbstractCaverphone
+ Encodes a string into a Caverphone value.
+ |
+
RefinedSoundex
+ Encodes a string into a Refined Soundex value.
+ |
+
Soundex
+ Encodes a string into a Soundex value.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/BCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/BCodec.html new file mode 100644 index 0000000..afb4afd --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/BCodec.html @@ -0,0 +1,788 @@ + + + + + + +public class BCodec +extends Object +implements StringEncoder, StringDecoder+
+ RFC 1522 describes techniques to allow the encoding of non-ASCII + text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message + handling software. +
+ This class is immutable and thread-safe.
Modifier and Type | +Field and Description | +
---|---|
protected static String |
+POSTFIX
+Prefix.
+ |
+
protected static String |
+PREFIX
+Postfix.
+ |
+
protected static char |
+SEP
+Separator.
+ |
+
Constructor and Description | +
---|
BCodec()
+Default constructor.
+ |
+
BCodec(Charset charset)
+Constructor which allows for the selection of a default Charset
+ |
+
BCodec(String charsetName)
+Constructor which allows for the selection of a default Charset
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+decode(Object value)
+Decodes a Base64 object into its original form.
+ |
+
String |
+decode(String value)
+Decodes a Base64 string into its original form.
+ |
+
protected String |
+decodeText(String text)
+Applies an RFC 1522 compliant decoding scheme to the given string of text.
+ |
+
protected byte[] |
+doDecoding(byte[] bytes)
+Decodes an array of bytes using the defined encoding scheme.
+ |
+
protected byte[] |
+doEncoding(byte[] bytes)
+Encodes an array of bytes using the defined encoding scheme.
+ |
+
Object |
+encode(Object value)
+Encodes an object into its Base64 form using the default Charset.
+ |
+
String |
+encode(String strSource)
+Encodes a string into its Base64 form using the default Charset.
+ |
+
String |
+encode(String strSource,
+ Charset sourceCharset)
+Encodes a string into its Base64 form using the specified Charset.
+ |
+
String |
+encode(String strSource,
+ String sourceCharset)
+Encodes a string into its Base64 form using the specified Charset.
+ |
+
protected String |
+encodeText(String text,
+ Charset charset)
+Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
+ |
+
protected String |
+encodeText(String text,
+ String charsetName)
+Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
+ |
+
Charset |
+getCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
String |
+getDefaultCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
protected String |
+getEncoding()
+Returns the codec name (referred to as encoding in the RFC 1522).
+ |
+
protected static final char SEP+
protected static final String POSTFIX+
protected static final String PREFIX+
public BCodec()+
public BCodec(Charset charset)+
charset
- the default string Charset to use.public BCodec(String charsetName)+
charsetName
- the default Charset to use.UnsupportedCharsetException
- If the named Charset is unavailableprotected String getEncoding()+
protected byte[] doEncoding(byte[] bytes)+
bytes
- Data to be encodedprotected byte[] doDecoding(byte[] bytes)+
bytes
- Data to be decodedpublic String encode(String strSource, + Charset sourceCharset) + throws EncoderException+
strSource
- string to convert to Base64 formsourceCharset
- the Charset for value
EncoderException
- thrown if a failure condition is encountered during the encoding process.public String encode(String strSource, + String sourceCharset) + throws EncoderException+
strSource
- string to convert to Base64 formsourceCharset
- the Charset for value
EncoderException
- thrown if a failure condition is encountered during the encoding process.public String encode(String strSource) + throws EncoderException+
encode
in interface StringEncoder
strSource
- string to convert to Base64 formEncoderException
- thrown if a failure condition is encountered during the encoding process.public String decode(String value) + throws DecoderException+
decode
in interface StringDecoder
value
- Base64 string to convert into its original formDecoderException
- A decoder exception is thrown if a failure condition is encountered during the decode process.public Object encode(Object value) + throws EncoderException+
encode
in interface Encoder
value
- object to convert to Base64 formEncoderException
- thrown if a failure condition is encountered during the encoding process.public Object decode(Object value) + throws DecoderException+
decode
in interface Decoder
value
- Base64 object to convert into its original formDecoderException
- Thrown if the argument is not a String
. Thrown if a failure condition is encountered
+ during the decode process.public Charset getCharset()+
public String getDefaultCharset()+
protected String encodeText(String text, + Charset charset) + throws EncoderException+
+ This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific encoding.
text
- a string to encodecharset
- a charset to be usedEncoderException
- thrown if there is an error condition during the Encoding process.protected String encodeText(String text, + String charsetName) + throws EncoderException, + UnsupportedEncodingException+
+ This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific encoding.
text
- a string to encodecharsetName
- the charset to useEncoderException
- thrown if there is an error condition during the Encoding process.UnsupportedEncodingException
- if charset is not availableprotected String decodeText(String text) + throws DecoderException, + UnsupportedEncodingException+
+ This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific decoding.
text
- a string to decodenull
if the input is null
.DecoderException
- thrown if there is an error condition during the decoding process.UnsupportedEncodingException
- thrown if charset specified in the "encoded-word" header is not supportedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/PercentCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/PercentCodec.html new file mode 100644 index 0000000..9e5cc19 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/PercentCodec.html @@ -0,0 +1,419 @@ + + + + + + +public class PercentCodec +extends Object +implements BinaryEncoder, BinaryDecoder+
+ This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public + interface only call the access +
Constructor and Description | +
---|
PercentCodec()
+Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding
+ while it will not encode all the US-ASCII characters, except for character '%' that is used as escape
+ character for Percent-Encoding.
+ |
+
PercentCodec(byte[] alwaysEncodeChars,
+ boolean plusForSpace)
+Constructs a Percent codec by specifying the characters that belong to US-ASCII that should
+ always be encoded.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] bytes)
+Decode bytes encoded with Percent-Encoding based on RFC 3986.
+ |
+
Object |
+decode(Object obj)
+Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding.
+ |
+
byte[] |
+encode(byte[] bytes)
+Percent-Encoding based on RFC 3986.
+ |
+
Object |
+encode(Object obj)
+Encodes an object into using the Percent-Encoding.
+ |
+
public PercentCodec()+
public PercentCodec(byte[] alwaysEncodeChars, + boolean plusForSpace)+
alwaysEncodeChars
- the unsafe characters that should always be encodedplusForSpace
- the flag defining if the space character should be encoded as '+'public byte[] encode(byte[] bytes) + throws EncoderException+
encode
in interface BinaryEncoder
bytes
- Data to be encodedEncoderException
- thrown if the Encoder encounters a failure condition during the encoding process.public byte[] decode(byte[] bytes) + throws DecoderException+
decode
in interface BinaryDecoder
bytes
- A byte array which has been encoded with the appropriate encoderDecoderException
- A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.public Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- the object to encodeEncoderException
- if the object is not a byte arraypublic Object decode(Object obj) + throws DecoderException+
decode
in interface Decoder
obj
- the object to decodeDecoderException
- if the object is not a byte arrayCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QCodec.html new file mode 100644 index 0000000..db5c4ab --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QCodec.html @@ -0,0 +1,837 @@ + + + + + + +public class QCodec +extends Object +implements StringEncoder, StringDecoder+
+ RFC 1522 describes techniques to allow the encoding of non-ASCII + text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message + handling software. +
+ This class is conditionally thread-safe.
+ The instance field encodeBlanks
is mutable setEncodeBlanks(boolean)
+ but is not volatile, and accesses are not synchronised.
+ If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
+ is used to ensure safe publication of the value between threads, and must not invoke
+ setEncodeBlanks(boolean)
after initial setup.
Modifier and Type | +Field and Description | +
---|---|
protected static String |
+POSTFIX
+Prefix.
+ |
+
protected static String |
+PREFIX
+Postfix.
+ |
+
protected static char |
+SEP
+Separator.
+ |
+
Constructor and Description | +
---|
QCodec()
+Default constructor.
+ |
+
QCodec(Charset charset)
+Constructor which allows for the selection of a default Charset.
+ |
+
QCodec(String charsetName)
+Constructor which allows for the selection of a default Charset.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
Object |
+decode(Object obj)
+Decodes a quoted-printable object into its original form.
+ |
+
String |
+decode(String str)
+Decodes a quoted-printable string into its original form.
+ |
+
protected String |
+decodeText(String text)
+Applies an RFC 1522 compliant decoding scheme to the given string of text.
+ |
+
protected byte[] |
+doDecoding(byte[] bytes)
+Decodes an array of bytes using the defined encoding scheme.
+ |
+
protected byte[] |
+doEncoding(byte[] bytes)
+Encodes an array of bytes using the defined encoding scheme.
+ |
+
Object |
+encode(Object obj)
+Encodes an object into its quoted-printable form using the default Charset.
+ |
+
String |
+encode(String sourceStr)
+Encodes a string into its quoted-printable form using the default Charset.
+ |
+
String |
+encode(String sourceStr,
+ Charset sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
String |
+encode(String sourceStr,
+ String sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
protected String |
+encodeText(String text,
+ Charset charset)
+Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
+ |
+
protected String |
+encodeText(String text,
+ String charsetName)
+Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
+ |
+
Charset |
+getCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
String |
+getDefaultCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
protected String |
+getEncoding()
+Returns the codec name (referred to as encoding in the RFC 1522).
+ |
+
boolean |
+isEncodeBlanks()
+Tests if optional transformation of SPACE characters is to be used
+ |
+
void |
+setEncodeBlanks(boolean b)
+Defines whether optional transformation of SPACE characters is to be used
+ |
+
protected static final char SEP+
protected static final String POSTFIX+
protected static final String PREFIX+
public QCodec()+
public QCodec(Charset charset)+
charset
- the default string Charset to use.public QCodec(String charsetName)+
charsetName
- the Charset to use.UnsupportedCharsetException
- If the named Charset is unavailableprotected String getEncoding()+
protected byte[] doEncoding(byte[] bytes)+
bytes
- Data to be encodedprotected byte[] doDecoding(byte[] bytes) + throws DecoderException+
bytes
- Data to be decodedDecoderException
- A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.public String encode(String sourceStr, + Charset sourceCharset) + throws EncoderException+
sourceStr
- string to convert to quoted-printable formsourceCharset
- the Charset for sourceStrEncoderException
- thrown if a failure condition is encountered during the encoding process.public String encode(String sourceStr, + String sourceCharset) + throws EncoderException+
sourceStr
- string to convert to quoted-printable formsourceCharset
- the Charset for sourceStrEncoderException
- thrown if a failure condition is encountered during the encoding process.public String encode(String sourceStr) + throws EncoderException+
encode
in interface StringEncoder
sourceStr
- string to convert to quoted-printable formEncoderException
- thrown if a failure condition is encountered during the encoding process.public String decode(String str) + throws DecoderException+
decode
in interface StringDecoder
str
- quoted-printable string to convert into its original formDecoderException
- A decoder exception is thrown if a failure condition is encountered during the decode process.public Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- object to convert to quoted-printable formEncoderException
- thrown if a failure condition is encountered during the encoding process.public Object decode(Object obj) + throws DecoderException+
decode
in interface Decoder
obj
- quoted-printable object to convert into its original formDecoderException
- Thrown if the argument is not a String
. Thrown if a failure condition is encountered
+ during the decode process.public Charset getCharset()+
public String getDefaultCharset()+
public boolean isEncodeBlanks()+
true
if SPACE characters are to be transformed, false
otherwisepublic void setEncodeBlanks(boolean b)+
b
- true
if SPACE characters are to be transformed, false
otherwiseprotected String encodeText(String text, + Charset charset) + throws EncoderException+
+ This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific encoding.
text
- a string to encodecharset
- a charset to be usedEncoderException
- thrown if there is an error condition during the Encoding process.protected String encodeText(String text, + String charsetName) + throws EncoderException, + UnsupportedEncodingException+
+ This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific encoding.
text
- a string to encodecharsetName
- the charset to useEncoderException
- thrown if there is an error condition during the Encoding process.UnsupportedEncodingException
- if charset is not availableprotected String decodeText(String text) + throws DecoderException, + UnsupportedEncodingException+
+ This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
+ doEncoding(byte [])
method of a concrete class to perform the specific decoding.
text
- a string to decodenull
if the input is null
.DecoderException
- thrown if there is an error condition during the decoding process.UnsupportedEncodingException
- thrown if charset specified in the "encoded-word" header is not supportedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QuotedPrintableCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QuotedPrintableCodec.html new file mode 100644 index 0000000..198ecb0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/QuotedPrintableCodec.html @@ -0,0 +1,830 @@ + + + + + + +public class QuotedPrintableCodec +extends Object +implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder+
+ The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to + printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are + unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the + data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable + to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping + gateway. +
+ Note: +
+ Depending on the selected strict
parameter, this class will implement a different set of rules of the
+ quoted-printable spec:
+
strict=false
: only rules #1 and #2 are implemented
+ strict=true
: all rules #1 through #5 are implemented
+ + This class is immutable and thread-safe.
Constructor and Description | +
---|
QuotedPrintableCodec()
+Default constructor, assumes default Charset of
+Charsets.UTF_8 |
+
QuotedPrintableCodec(boolean strict)
+Constructor which allows for the selection of the strict mode.
+ |
+
QuotedPrintableCodec(Charset charset)
+Constructor which allows for the selection of a default Charset.
+ |
+
QuotedPrintableCodec(Charset charset,
+ boolean strict)
+Constructor which allows for the selection of a default Charset and strict mode.
+ |
+
QuotedPrintableCodec(String charsetName)
+Constructor which allows for the selection of a default Charset.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] bytes)
+Decodes an array of quoted-printable characters into an array of original bytes.
+ |
+
Object |
+decode(Object obj)
+Decodes a quoted-printable object into its original form.
+ |
+
String |
+decode(String sourceStr)
+Decodes a quoted-printable string into its original form using the default string Charset.
+ |
+
String |
+decode(String sourceStr,
+ Charset sourceCharset)
+Decodes a quoted-printable string into its original form using the specified string Charset.
+ |
+
String |
+decode(String sourceStr,
+ String sourceCharset)
+Decodes a quoted-printable string into its original form using the specified string Charset.
+ |
+
static byte[] |
+decodeQuotedPrintable(byte[] bytes)
+Decodes an array quoted-printable characters into an array of original bytes.
+ |
+
byte[] |
+encode(byte[] bytes)
+Encodes an array of bytes into an array of quoted-printable 7-bit characters.
+ |
+
Object |
+encode(Object obj)
+Encodes an object into its quoted-printable safe form.
+ |
+
String |
+encode(String sourceStr)
+Encodes a string into its quoted-printable form using the default string Charset.
+ |
+
String |
+encode(String sourceStr,
+ Charset sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
String |
+encode(String sourceStr,
+ String sourceCharset)
+Encodes a string into its quoted-printable form using the specified Charset.
+ |
+
static byte[] |
+encodeQuotedPrintable(BitSet printable,
+ byte[] bytes)
+Encodes an array of bytes into an array of quoted-printable 7-bit characters.
+ |
+
static byte[] |
+encodeQuotedPrintable(BitSet printable,
+ byte[] bytes,
+ boolean strict)
+Encodes an array of bytes into an array of quoted-printable 7-bit characters.
+ |
+
Charset |
+getCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
String |
+getDefaultCharset()
+Gets the default Charset name used for string decoding and encoding.
+ |
+
public QuotedPrintableCodec()+
Charsets.UTF_8
public QuotedPrintableCodec(boolean strict)+
strict
- if true
, soft line breaks will be usedpublic QuotedPrintableCodec(Charset charset)+
charset
- the default string Charset to use.public QuotedPrintableCodec(Charset charset, + boolean strict)+
charset
- the default string Charset to use.strict
- if true
, soft line breaks will be usedpublic QuotedPrintableCodec(String charsetName) + throws IllegalCharsetNameException, + IllegalArgumentException, + UnsupportedCharsetException+
charsetName
- the default string Charset to use.UnsupportedCharsetException
- If no support for the named Charset is available
+ in this instance of the Java virtual machineIllegalArgumentException
- If the given charsetName is nullIllegalCharsetNameException
- If the given Charset name is illegalpublic static final byte[] encodeQuotedPrintable(BitSet printable, + byte[] bytes)+
+ This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in + RFC 1521 and is suitable for encoding binary data and unformatted text.
printable
- bitset of characters deemed quoted-printablebytes
- array of bytes to be encodedpublic static final byte[] encodeQuotedPrintable(BitSet printable, + byte[] bytes, + boolean strict)+
+ Depending on the selection of the strict
parameter, this function either implements the full ruleset
+ or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
+ RFC 1521 and is suitable for encoding binary data and unformatted text.
printable
- bitset of characters deemed quoted-printablebytes
- array of bytes to be encodedstrict
- if true
the full ruleset is used, otherwise only rule #1 and rule #2public static final byte[] decodeQuotedPrintable(byte[] bytes) + throws DecoderException+
+ This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as + defined in RFC 1521.
bytes
- array of quoted-printable charactersDecoderException
- Thrown if quoted-printable decoding is unsuccessfulpublic byte[] encode(byte[] bytes)+
+ Depending on the selection of the strict
parameter, this function either implements the full ruleset
+ or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
+ RFC 1521 and is suitable for encoding binary data and unformatted text.
encode
in interface BinaryEncoder
bytes
- array of bytes to be encodedpublic byte[] decode(byte[] bytes) + throws DecoderException+
+ This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as + defined in RFC 1521.
decode
in interface BinaryDecoder
bytes
- array of quoted-printable charactersDecoderException
- Thrown if quoted-printable decoding is unsuccessfulpublic String encode(String sourceStr) + throws EncoderException+
+ Depending on the selection of the strict
parameter, this function either implements the full ruleset
+ or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
+ RFC 1521 and is suitable for encoding binary data and unformatted text.
encode
in interface StringEncoder
sourceStr
- string to convert to quoted-printable formEncoderException
- Thrown if quoted-printable encoding is unsuccessfulgetCharset()
public String decode(String sourceStr, + Charset sourceCharset) + throws DecoderException+
sourceStr
- quoted-printable string to convert into its original formsourceCharset
- the original string CharsetDecoderException
- Thrown if quoted-printable decoding is unsuccessfulpublic String decode(String sourceStr, + String sourceCharset) + throws DecoderException, + UnsupportedEncodingException+
sourceStr
- quoted-printable string to convert into its original formsourceCharset
- the original string CharsetDecoderException
- Thrown if quoted-printable decoding is unsuccessfulUnsupportedEncodingException
- Thrown if Charset is not supportedpublic String decode(String sourceStr) + throws DecoderException+
decode
in interface StringDecoder
sourceStr
- quoted-printable string to convert into its original formDecoderException
- Thrown if quoted-printable decoding is unsuccessful. Thrown if Charset is not supported.getCharset()
public Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- string to convert to a quoted-printable formEncoderException
- Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
+ unsuccessfulpublic Object decode(Object obj) + throws DecoderException+
decode
in interface Decoder
obj
- quoted-printable object to convert into its original formDecoderException
- Thrown if the argument is not a String
or byte[]
. Thrown if a failure
+ condition is encountered during the decode process.public Charset getCharset()+
public String getDefaultCharset()+
public String encode(String sourceStr, + Charset sourceCharset)+
+ Depending on the selection of the strict
parameter, this function either implements the full ruleset
+ or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
+ RFC 1521 and is suitable for encoding binary data and unformatted text.
sourceStr
- string to convert to quoted-printable formsourceCharset
- the Charset for sourceStrpublic String encode(String sourceStr, + String sourceCharset) + throws UnsupportedEncodingException+
+ Depending on the selection of the strict
parameter, this function either implements the full ruleset
+ or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
+ RFC 1521 and is suitable for encoding binary data and unformatted text.
sourceStr
- string to convert to quoted-printable formsourceCharset
- the Charset for sourceStrUnsupportedEncodingException
- Thrown if the Charset is not supportedCopyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/URLCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/URLCodec.html new file mode 100644 index 0000000..add19ff --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/URLCodec.html @@ -0,0 +1,708 @@ + + + + + + +public class URLCodec +extends Object +implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder+
+ This codec is meant to be a replacement for standard Java classes URLEncoder
and
+ URLDecoder
on older Java platforms, as these classes in Java versions below
+ 1.4 rely on the platform's default charset encoding.
+
+ This class is thread-safe since 1.11
Modifier and Type | +Field and Description | +
---|---|
protected String |
+charset
+Deprecated.
+
+TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126)
+ |
+
protected static byte |
+ESCAPE_CHAR
+Release 1.5 made this field final.
+ |
+
protected static BitSet |
+WWW_FORM_URL
+Deprecated.
+
+1.11 Will be removed in 2.0 (CODEC-230)
+ |
+
Constructor and Description | +
---|
URLCodec()
+Default constructor.
+ |
+
URLCodec(String charset)
+Constructor which allows for the selection of a default charset.
+ |
+
Modifier and Type | +Method and Description | +
---|---|
byte[] |
+decode(byte[] bytes)
+Decodes an array of URL safe 7-bit characters into an array of original bytes.
+ |
+
Object |
+decode(Object obj)
+Decodes a URL safe object into its original form.
+ |
+
String |
+decode(String str)
+Decodes a URL safe string into its original form using the default string charset.
+ |
+
String |
+decode(String str,
+ String charsetName)
+Decodes a URL safe string into its original form using the specified encoding.
+ |
+
static byte[] |
+decodeUrl(byte[] bytes)
+Decodes an array of URL safe 7-bit characters into an array of original bytes.
+ |
+
byte[] |
+encode(byte[] bytes)
+Encodes an array of bytes into an array of URL safe 7-bit characters.
+ |
+
Object |
+encode(Object obj)
+Encodes an object into its URL safe form.
+ |
+
String |
+encode(String str)
+Encodes a string into its URL safe form using the default string charset.
+ |
+
String |
+encode(String str,
+ String charsetName)
+Encodes a string into its URL safe form using the specified string charset.
+ |
+
static byte[] |
+encodeUrl(BitSet urlsafe,
+ byte[] bytes)
+Encodes an array of bytes into an array of URL safe 7-bit characters.
+ |
+
String |
+getDefaultCharset()
+The default charset used for string decoding and encoding.
+ |
+
String |
+getEncoding()
+Deprecated.
+
+Use
+getDefaultCharset() , will be removed in 2.0. |
+
@Deprecated +protected volatile String charset+
protected static final byte ESCAPE_CHAR+
@Deprecated +protected static final BitSet WWW_FORM_URL+
public URLCodec()+
public URLCodec(String charset)+
charset
- the default string charset to use.public static final byte[] encodeUrl(BitSet urlsafe, + byte[] bytes)+
urlsafe
- bitset of characters deemed URL safebytes
- array of bytes to convert to URL safe characterspublic static final byte[] decodeUrl(byte[] bytes) + throws DecoderException+
bytes
- array of URL safe charactersDecoderException
- Thrown if URL decoding is unsuccessfulpublic byte[] encode(byte[] bytes)+
encode
in interface BinaryEncoder
bytes
- array of bytes to convert to URL safe characterspublic byte[] decode(byte[] bytes) + throws DecoderException+
decode
in interface BinaryDecoder
bytes
- array of URL safe charactersDecoderException
- Thrown if URL decoding is unsuccessfulpublic String encode(String str, + String charsetName) + throws UnsupportedEncodingException+
str
- string to convert to a URL safe formcharsetName
- the charset for strUnsupportedEncodingException
- Thrown if charset is not supportedpublic String encode(String str) + throws EncoderException+
encode
in interface StringEncoder
str
- string to convert to a URL safe formEncoderException
- Thrown if URL encoding is unsuccessfulgetDefaultCharset()
public String decode(String str, + String charsetName) + throws DecoderException, + UnsupportedEncodingException+
str
- URL safe string to convert into its original formcharsetName
- the original string charsetDecoderException
- Thrown if URL decoding is unsuccessfulUnsupportedEncodingException
- Thrown if charset is not supportedpublic String decode(String str) + throws DecoderException+
decode
in interface StringDecoder
str
- URL safe string to convert into its original formDecoderException
- Thrown if URL decoding is unsuccessfulgetDefaultCharset()
public Object encode(Object obj) + throws EncoderException+
encode
in interface Encoder
obj
- string to convert to a URL safe formEncoderException
- Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessfulpublic Object decode(Object obj) + throws DecoderException+
decode
in interface Decoder
obj
- URL safe object to convert into its original formDecoderException
- Thrown if the argument is not a String
or byte[]
. Thrown if a failure
+ condition is encountered during the decode process.public String getDefaultCharset()+
@Deprecated +public String getEncoding()+
getDefaultCharset()
, will be removed in 2.0.String
encoding used for decoding and encoding.Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/BCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/BCodec.html new file mode 100644 index 0000000..a3e32c0 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/BCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/PercentCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/PercentCodec.html new file mode 100644 index 0000000..06ff83f --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/PercentCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QCodec.html new file mode 100644 index 0000000..4822874 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QuotedPrintableCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QuotedPrintableCodec.html new file mode 100644 index 0000000..e5f09fe --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/QuotedPrintableCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/URLCodec.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/URLCodec.html new file mode 100644 index 0000000..052e828 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/class-use/URLCodec.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-frame.html new file mode 100644 index 0000000..8c08646 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-frame.html @@ -0,0 +1,24 @@ + + + + + + +See: Description
+Class | +Description | +
---|---|
BCodec | +
+ Identical to the Base64 encoding defined by RFC 1521
+ and allows a character set to be specified.
+ |
+
PercentCodec | +
+ Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification.
+ |
+
QCodec | +
+ Similar to the Quoted-Printable content-transfer-encoding defined in
+ RFC 1521 and designed to allow text containing mostly ASCII
+ characters to be decipherable on an ASCII terminal without decoding.
+ |
+
QuotedPrintableCodec | +
+ Codec for the Quoted-Printable section of RFC 1521.
+ |
+
URLCodec | +
+ Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
+ |
+
+ Network related encoding and decoding. +
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-tree.html new file mode 100644 index 0000000..d5bb87f --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-tree.html @@ -0,0 +1,142 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-use.html new file mode 100644 index 0000000..f0c6a76 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/net/package-use.html @@ -0,0 +1,125 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-frame.html new file mode 100644 index 0000000..cd402ac --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-frame.html @@ -0,0 +1,36 @@ + + + + + + +See: Description
+Interface | +Description | +
---|---|
BinaryDecoder | +
+ Defines common decoding methods for byte array decoders.
+ |
+
BinaryEncoder | +
+ Defines common encoding methods for byte array encoders.
+ |
+
Decoder | +
+ Provides the highest level of abstraction for Decoders.
+ |
+
Encoder | +
+ Provides the highest level of abstraction for Encoders.
+ |
+
StringDecoder | +
+ Defines common decoding methods for String decoders.
+ |
+
StringEncoder | +
+ Defines common encoding methods for String encoders.
+ |
+
Class | +Description | +
---|---|
CharEncoding | +
+ Character encoding names required of every implementation of the Java platform.
+ |
+
Charsets | +
+ Charsets required of every implementation of the Java platform.
+ |
+
StringEncoderComparator | +
+ Compares Strings using a
+StringEncoder . |
+
Exception | +Description | +
---|---|
DecoderException | +
+ Thrown when there is a failure condition during the decoding process.
+ |
+
EncoderException | +
+ Thrown when there is a failure condition during the encoding process.
+ |
+
Interfaces and classes used by + the various implementations in the sub-packages.
+ +Definitive implementations of commonly used encoders and decoders.
+ +Codec is currently comprised of a modest set of utilities and a + simple framework for String encoding and decoding in three categories: + Binary Encoders, Language Encoders, and Network Encoders.
+ ++ + org.apache.commons.codec.binary.Base64 + | ++ Provides Base64 content-transfer-encoding as defined in + RFC 2045 + | +Production | +
+ + org.apache.commons.codec.binary.Hex + | ++ Converts an array of bytes into an array of characters + representing the hexadecimal values of each byte in order + | +Production | +
+ Codec contains a number of commonly used language and phonetic + encoders +
++ org.apache.commons.codec.language.Soundex + | +Implementation of the Soundex algorithm. | +Production | +
+ org.apache.commons.codec.language.Metaphone + | +Implementation of the Metaphone algorithm. | +Production | +
Codec contains network related encoders
++ org.apache.commons.codec.net.URLCodec + | +Implements the 'www-form-urlencoded' encoding scheme. | +Production | +
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-tree.html new file mode 100644 index 0000000..880ad29 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-tree.html @@ -0,0 +1,165 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-use.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-use.html new file mode 100644 index 0000000..1ca8f66 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/org/apache/commons/codec/package-use.html @@ -0,0 +1,356 @@ + + + + + + +Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Class and Description | +
---|
Decoder
+ Provides the highest level of abstraction for Decoders.
+ |
+
DecoderException
+ Thrown when there is a failure condition during the decoding process.
+ |
+
Encoder
+ Provides the highest level of abstraction for Encoders.
+ |
+
EncoderException
+ Thrown when there is a failure condition during the encoding process.
+ |
+
StringEncoder
+ Defines common encoding methods for String encoders.
+ |
+
Class and Description | +
---|
BinaryDecoder
+ Defines common decoding methods for byte array decoders.
+ |
+
BinaryEncoder
+ Defines common encoding methods for byte array encoders.
+ |
+
Decoder
+ Provides the highest level of abstraction for Decoders.
+ |
+
DecoderException
+ Thrown when there is a failure condition during the decoding process.
+ |
+
Encoder
+ Provides the highest level of abstraction for Encoders.
+ |
+
EncoderException
+ Thrown when there is a failure condition during the encoding process.
+ |
+
Class and Description | +
---|
Encoder
+ Provides the highest level of abstraction for Encoders.
+ |
+
EncoderException
+ Thrown when there is a failure condition during the encoding process.
+ |
+
StringEncoder
+ Defines common encoding methods for String encoders.
+ |
+
Class and Description | +
---|
Encoder
+ Provides the highest level of abstraction for Encoders.
+ |
+
EncoderException
+ Thrown when there is a failure condition during the encoding process.
+ |
+
StringEncoder
+ Defines common encoding methods for String encoders.
+ |
+
Class and Description | +
---|
BinaryDecoder
+ Defines common decoding methods for byte array decoders.
+ |
+
BinaryEncoder
+ Defines common encoding methods for byte array encoders.
+ |
+
Decoder
+ Provides the highest level of abstraction for Decoders.
+ |
+
DecoderException
+ Thrown when there is a failure condition during the decoding process.
+ |
+
Encoder
+ Provides the highest level of abstraction for Encoders.
+ |
+
EncoderException
+ Thrown when there is a failure condition during the encoding process.
+ |
+
StringDecoder
+ Defines common decoding methods for String decoders.
+ |
+
StringEncoder
+ Defines common encoding methods for String encoders.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-frame.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-frame.html new file mode 100644 index 0000000..04f051a --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-frame.html @@ -0,0 +1,27 @@ + + + + + + ++ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-summary.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-summary.html new file mode 100644 index 0000000..fb8e477 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-summary.html @@ -0,0 +1,180 @@ + + + + + + +
Package | +Description | +
---|---|
org.apache.commons.codec | +
+ Interfaces and classes used by
+ the various implementations in the sub-packages.
+ |
+
org.apache.commons.codec.binary | +
+ Base64, Base32, Binary, and Hexadecimal String encoding and decoding.
+ |
+
org.apache.commons.codec.cli | ++ |
org.apache.commons.codec.digest | +
+ Simplifies common
+MessageDigest tasks and
+ includes a libc crypt(3) compatible crypt method that supports DES,
+ MD5, SHA-256 and SHA-512 based algorithms as well as the Apache
+ specific "$apr1$" variant. |
+
org.apache.commons.codec.language | +
+ Language and phonetic encoders.
+ |
+
org.apache.commons.codec.language.bm | +
+ Implementation details of the Beider-Morse codec.
+ |
+
org.apache.commons.codec.net | +
+
+ Network related encoding and decoding.
+ |
+
Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-tree.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-tree.html new file mode 100644 index 0000000..989a511 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/overview-tree.html @@ -0,0 +1,269 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/package-list b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/package-list new file mode 100644 index 0000000..8b23b44 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/package-list @@ -0,0 +1,7 @@ +org.apache.commons.codec +org.apache.commons.codec.binary +org.apache.commons.codec.cli +org.apache.commons.codec.digest +org.apache.commons.codec.language +org.apache.commons.codec.language.bm +org.apache.commons.codec.net diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/script.js b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/script.js new file mode 100644 index 0000000..b346356 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/script.js @@ -0,0 +1,30 @@ +function show(type) +{ + count = 0; + for (var key in methods) { + var row = document.getElementById(key); + if ((methods[key] & type) != 0) { + row.style.display = ''; + row.className = (count++ % 2) ? rowColor : altColor; + } + else + row.style.display = 'none'; + } + updateTabs(type); +} + +function updateTabs(type) +{ + for (var value in tabs) { + var sNode = document.getElementById(tabs[value][0]); + var spanNode = sNode.firstChild; + if (value == type) { + sNode.className = activeTableTab; + spanNode.innerHTML = tabs[value][1]; + } + else { + sNode.className = tableTab; + spanNode.innerHTML = "" + tabs[value][1] + ""; + } + } +} diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/serialized-form.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/serialized-form.html new file mode 100644 index 0000000..2d67696 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/serialized-form.html @@ -0,0 +1,151 @@ + + + + + + +Copyright © 2002–2019 The Apache Software Foundation. All rights reserved.
+ + diff --git a/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/src-html/org/apache/commons/codec/BinaryDecoder.html b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/src-html/org/apache/commons/codec/BinaryDecoder.html new file mode 100644 index 0000000..969ad74 --- /dev/null +++ b/trabalho_implementacao_01/bibliotecas/commons-codec-1.12/apidocs/src-html/org/apache/commons/codec/BinaryDecoder.html @@ -0,0 +1,110 @@ + + + +001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Defines common decoding methods for byte array decoders. +022 * +023 * @version $Id$ +024 */ +025public interface BinaryDecoder extends Decoder { +026 +027 /** +028 * Decodes a byte array and returns the results as a byte array. +029 * +030 * @param source +031 * A byte array which has been encoded with the appropriate encoder +032 * @return a byte array that contains decoded content +033 * @throws DecoderException +034 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. +035 */ +036 byte[] decode(byte[] source) throws DecoderException; +037} +038 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Defines common encoding methods for byte array encoders. +022 * +023 * @version $Id$ +024 */ +025public interface BinaryEncoder extends Encoder { +026 +027 /** +028 * Encodes a byte array and return the encoded data as a byte array. +029 * +030 * @param source +031 * Data to be encoded +032 * @return A byte array containing the encoded data +033 * @throws EncoderException +034 * thrown if the Encoder encounters a failure condition during the encoding process. +035 */ +036 byte[] encode(byte[] source) throws EncoderException; +037} +038 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Character encoding names required of every implementation of the Java platform. +022 * +023 * From the Java documentation <a +024 * href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>: +025 * <p> +026 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the +027 * release documentation for your implementation to see if any other encodings are supported. Consult the release +028 * documentation for your implementation to see if any other encodings are supported.</cite> +029 * </p> +030 * +031 * <ul> +032 * <li><code>US-ASCII</code><br> +033 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> +034 * <li><code>ISO-8859-1</code><br> +035 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> +036 * <li><code>UTF-8</code><br> +037 * Eight-bit Unicode Transformation Format.</li> +038 * <li><code>UTF-16BE</code><br> +039 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> +040 * <li><code>UTF-16LE</code><br> +041 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> +042 * <li><code>UTF-16</code><br> +043 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order +044 * accepted on input, big-endian used on output.)</li> +045 * </ul> +046 * +047 * This perhaps would best belong in the [lang] project. Even if a similar interface is defined in [lang], it is not +048 * foreseen that [codec] would be made to depend on [lang]. +049 * +050 * <p> +051 * This class is immutable and thread-safe. +052 * </p> +053 * +054 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +055 * @since 1.4 +056 * @version $Id$ +057 */ +058public class CharEncoding { +059 /** +060 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. +061 * <p> +062 * Every implementation of the Java platform is required to support this character encoding. +063 * +064 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +065 */ +066 public static final String ISO_8859_1 = "ISO-8859-1"; +067 +068 /** +069 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. +070 * <p> +071 * Every implementation of the Java platform is required to support this character encoding. +072 * +073 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +074 */ +075 public static final String US_ASCII = "US-ASCII"; +076 +077 /** +078 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark +079 * (either order accepted on input, big-endian used on output) +080 * <p> +081 * Every implementation of the Java platform is required to support this character encoding. +082 * +083 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +084 */ +085 public static final String UTF_16 = "UTF-16"; +086 +087 /** +088 * Sixteen-bit Unicode Transformation Format, big-endian byte order. +089 * <p> +090 * Every implementation of the Java platform is required to support this character encoding. +091 * +092 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +093 */ +094 public static final String UTF_16BE = "UTF-16BE"; +095 +096 /** +097 * Sixteen-bit Unicode Transformation Format, little-endian byte order. +098 * <p> +099 * Every implementation of the Java platform is required to support this character encoding. +100 * +101 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +102 */ +103 public static final String UTF_16LE = "UTF-16LE"; +104 +105 /** +106 * Eight-bit Unicode Transformation Format. +107 * <p> +108 * Every implementation of the Java platform is required to support this character encoding. +109 * +110 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +111 */ +112 public static final String UTF_8 = "UTF-8"; +113} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec; +018 +019import java.nio.charset.Charset; +020 +021/** +022 * Charsets required of every implementation of the Java platform. +023 * +024 * From the Java documentation <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard +025 * charsets</a>: +026 * <p> +027 * <cite>Every implementation of the Java platform is required to support the following character encodings. Consult the +028 * release documentation for your implementation to see if any other encodings are supported. Consult the release +029 * documentation for your implementation to see if any other encodings are supported. </cite> +030 * </p> +031 * +032 * <ul> +033 * <li><code>US-ASCII</code><br> +034 * Seven-bit ASCII, a.k.a. ISO646-US, a.k.a. the Basic Latin block of the Unicode character set.</li> +035 * <li><code>ISO-8859-1</code><br> +036 * ISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1.</li> +037 * <li><code>UTF-8</code><br> +038 * Eight-bit Unicode Transformation Format.</li> +039 * <li><code>UTF-16BE</code><br> +040 * Sixteen-bit Unicode Transformation Format, big-endian byte order.</li> +041 * <li><code>UTF-16LE</code><br> +042 * Sixteen-bit Unicode Transformation Format, little-endian byte order.</li> +043 * <li><code>UTF-16</code><br> +044 * Sixteen-bit Unicode Transformation Format, byte order specified by a mandatory initial byte-order mark (either order +045 * accepted on input, big-endian used on output.)</li> +046 * </ul> +047 * +048 * This perhaps would best belong in the Commons Lang project. Even if a similar class is defined in Commons Lang, it is +049 * not foreseen that Commons Codec would be made to depend on Commons Lang. +050 * +051 * <p> +052 * This class is immutable and thread-safe. +053 * </p> +054 * +055 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +056 * @since 1.7 +057 * @version $Id: CharEncoding.java 1173287 2011-09-20 18:16:19Z ggregory $ +058 */ +059public class Charsets { +060 +061 // +062 // This class should only contain Charset instances for required encodings. This guarantees that it will load +063 // correctly and without delay on all Java platforms. +064 // +065 +066 /** +067 * Returns the given Charset or the default Charset if the given Charset is null. +068 * +069 * @param charset +070 * A charset or null. +071 * @return the given Charset or the default Charset if the given Charset is null +072 */ +073 public static Charset toCharset(final Charset charset) { +074 return charset == null ? Charset.defaultCharset() : charset; +075 } +076 +077 /** +078 * Returns a Charset for the named charset. If the name is null, return the default Charset. +079 * +080 * @param charset +081 * The name of the requested charset, may be null. +082 * @return a Charset for the named charset +083 * @throws java.nio.charset.UnsupportedCharsetException +084 * If the named charset is unavailable +085 */ +086 public static Charset toCharset(final String charset) { +087 return charset == null ? Charset.defaultCharset() : Charset.forName(charset); +088 } +089 +090 /** +091 * CharEncodingISO Latin Alphabet No. 1, a.k.a. ISO-LATIN-1. +092 * <p> +093 * Every implementation of the Java platform is required to support this character encoding. +094 * </p> +095 * <p> +096 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +097 * </p> +098 * +099 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +100 */ +101 public static final Charset ISO_8859_1 = Charset.forName(CharEncoding.ISO_8859_1); +102 +103 /** +104 * Seven-bit ASCII, also known as ISO646-US, also known as the Basic Latin block of the Unicode character set. +105 * <p> +106 * Every implementation of the Java platform is required to support this character encoding. +107 * </p> +108 * <p> +109 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +110 * </p> +111 * +112 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +113 */ +114 public static final Charset US_ASCII = Charset.forName(CharEncoding.US_ASCII); +115 +116 /** +117 * Sixteen-bit Unicode Transformation Format, The byte order specified by a mandatory initial byte-order mark +118 * (either order accepted on input, big-endian used on output) +119 * <p> +120 * Every implementation of the Java platform is required to support this character encoding. +121 * </p> +122 * <p> +123 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +124 * </p> +125 * +126 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +127 */ +128 public static final Charset UTF_16 = Charset.forName(CharEncoding.UTF_16); +129 +130 /** +131 * Sixteen-bit Unicode Transformation Format, big-endian byte order. +132 * <p> +133 * Every implementation of the Java platform is required to support this character encoding. +134 * </p> +135 * <p> +136 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +137 * </p> +138 * +139 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +140 */ +141 public static final Charset UTF_16BE = Charset.forName(CharEncoding.UTF_16BE); +142 +143 /** +144 * Sixteen-bit Unicode Transformation Format, little-endian byte order. +145 * <p> +146 * Every implementation of the Java platform is required to support this character encoding. +147 * </p> +148 * <p> +149 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +150 * </p> +151 * +152 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +153 */ +154 public static final Charset UTF_16LE = Charset.forName(CharEncoding.UTF_16LE); +155 +156 /** +157 * Eight-bit Unicode Transformation Format. +158 * <p> +159 * Every implementation of the Java platform is required to support this character encoding. +160 * </p> +161 * <p> +162 * On Java 7 or later, use {@link java.nio.charset.StandardCharsets#ISO_8859_1} instead. +163 * </p> +164 * +165 * @see <a href="http://docs.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +166 */ +167 public static final Charset UTF_8 = Charset.forName(CharEncoding.UTF_8); +168} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Provides the highest level of abstraction for Decoders. +022 * <p> +023 * This is the sister interface of {@link Encoder}. All Decoders implement this common generic interface. +024 * Allows a user to pass a generic Object to any Decoder implementation in the codec package. +025 * <p> +026 * One of the two interfaces at the center of the codec package. +027 * +028 * @version $Id$ +029 */ +030public interface Decoder { +031 +032 /** +033 * Decodes an "encoded" Object and returns a "decoded" Object. Note that the implementation of this interface will +034 * try to cast the Object parameter to the specific type expected by a particular Decoder implementation. If a +035 * {@link ClassCastException} occurs this decode method will throw a DecoderException. +036 * +037 * @param source +038 * the object to decode +039 * @return a 'decoded" object +040 * @throws DecoderException +041 * a decoder exception can be thrown for any number of reasons. Some good candidates are that the +042 * parameter passed to this method is null, a param cannot be cast to the appropriate type for a +043 * specific encoder. +044 */ +045 Object decode(Object source) throws DecoderException; +046} +047 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Thrown when there is a failure condition during the decoding process. This exception is thrown when a {@link Decoder} +022 * encounters a decoding specific exception such as invalid data, or characters outside of the expected range. +023 * +024 * @version $Id$ +025 */ +026public class DecoderException extends Exception { +027 +028 /** +029 * Declares the Serial Version Uid. +030 * +031 * @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a> +032 */ +033 private static final long serialVersionUID = 1L; +034 +035 /** +036 * Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may +037 * subsequently be initialized by a call to {@link #initCause}. +038 * +039 * @since 1.4 +040 */ +041 public DecoderException() { +042 super(); +043 } +044 +045 /** +046 * Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently +047 * be initialized by a call to {@link #initCause}. +048 * +049 * @param message +050 * The detail message which is saved for later retrieval by the {@link #getMessage()} method. +051 */ +052 public DecoderException(final String message) { +053 super(message); +054 } +055 +056 /** +057 * Constructs a new exception with the specified detail message and cause. +058 * <p> +059 * Note that the detail message associated with <code>cause</code> is not automatically incorporated into this +060 * exception's detail message. +061 * +062 * @param message +063 * The detail message which is saved for later retrieval by the {@link #getMessage()} method. +064 * @param cause +065 * The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> +066 * value is permitted, and indicates that the cause is nonexistent or unknown. +067 * @since 1.4 +068 */ +069 public DecoderException(final String message, final Throwable cause) { +070 super(message, cause); +071 } +072 +073 /** +074 * Constructs a new exception with the specified cause and a detail message of <code>(cause==null ? +075 * null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>). +076 * This constructor is useful for exceptions that are little more than wrappers for other throwables. +077 * +078 * @param cause +079 * The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> +080 * value is permitted, and indicates that the cause is nonexistent or unknown. +081 * @since 1.4 +082 */ +083 public DecoderException(final Throwable cause) { +084 super(cause); +085 } +086} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Provides the highest level of abstraction for Encoders. +022 * <p> +023 * This is the sister interface of {@link Decoder}. Every implementation of Encoder provides this +024 * common generic interface which allows a user to pass a generic Object to any Encoder implementation +025 * in the codec package. +026 * +027 * @version $Id$ +028 */ +029public interface Encoder { +030 +031 /** +032 * Encodes an "Object" and returns the encoded content as an Object. The Objects here may just be +033 * <code>byte[]</code> or <code>String</code>s depending on the implementation used. +034 * +035 * @param source +036 * An object to encode +037 * @return An "encoded" Object +038 * @throws EncoderException +039 * An encoder exception is thrown if the encoder experiences a failure condition during the encoding +040 * process. +041 */ +042 Object encode(Object source) throws EncoderException; +043} +044 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Thrown when there is a failure condition during the encoding process. This exception is thrown when an +022 * {@link Encoder} encounters a encoding specific exception such as invalid data, inability to calculate a checksum, +023 * characters outside of the expected range. +024 * +025 * @version $Id$ +026 */ +027public class EncoderException extends Exception { +028 +029 /** +030 * Declares the Serial Version Uid. +031 * +032 * @see <a href="http://c2.com/cgi/wiki?AlwaysDeclareSerialVersionUid">Always Declare Serial Version Uid</a> +033 */ +034 private static final long serialVersionUID = 1L; +035 +036 /** +037 * Constructs a new exception with <code>null</code> as its detail message. The cause is not initialized, and may +038 * subsequently be initialized by a call to {@link #initCause}. +039 * +040 * @since 1.4 +041 */ +042 public EncoderException() { +043 super(); +044 } +045 +046 /** +047 * Constructs a new exception with the specified detail message. The cause is not initialized, and may subsequently +048 * be initialized by a call to {@link #initCause}. +049 * +050 * @param message +051 * a useful message relating to the encoder specific error. +052 */ +053 public EncoderException(final String message) { +054 super(message); +055 } +056 +057 /** +058 * Constructs a new exception with the specified detail message and cause. +059 * +060 * <p> +061 * Note that the detail message associated with <code>cause</code> is not automatically incorporated into this +062 * exception's detail message. +063 * </p> +064 * +065 * @param message +066 * The detail message which is saved for later retrieval by the {@link #getMessage()} method. +067 * @param cause +068 * The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> +069 * value is permitted, and indicates that the cause is nonexistent or unknown. +070 * @since 1.4 +071 */ +072 public EncoderException(final String message, final Throwable cause) { +073 super(message, cause); +074 } +075 +076 /** +077 * Constructs a new exception with the specified cause and a detail message of <code>(cause==null ? +078 * null : cause.toString())</code> (which typically contains the class and detail message of <code>cause</code>). +079 * This constructor is useful for exceptions that are little more than wrappers for other throwables. +080 * +081 * @param cause +082 * The cause which is saved for later retrieval by the {@link #getCause()} method. A <code>null</code> +083 * value is permitted, and indicates that the cause is nonexistent or unknown. +084 * @since 1.4 +085 */ +086 public EncoderException(final Throwable cause) { +087 super(cause); +088 } +089} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Defines common decoding methods for String decoders. +022 * +023 * @version $Id$ +024 */ +025public interface StringDecoder extends Decoder { +026 +027 /** +028 * Decodes a String and returns a String. +029 * +030 * @param source +031 * the String to decode +032 * @return the encoded String +033 * @throws DecoderException +034 * thrown if there is an error condition during the Encoding process. +035 */ +036 String decode(String source) throws DecoderException; +037} +038 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020/** +021 * Defines common encoding methods for String encoders. +022 * +023 * @version $Id$ +024 */ +025public interface StringEncoder extends Encoder { +026 +027 /** +028 * Encodes a String and returns a String. +029 * +030 * @param source +031 * the String to encode +032 * @return the encoded String +033 * @throws EncoderException +034 * thrown if there is an error condition during the encoding process. +035 */ +036 String encode(String source) throws EncoderException; +037} +038 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec; +019 +020import java.util.Comparator; +021 +022/** +023 * Compares Strings using a {@link StringEncoder}. This comparator is used to sort Strings by an encoding scheme such as +024 * Soundex, Metaphone, etc. This class can come in handy if one need to sort Strings by an encoded form of a name such +025 * as Soundex. +026 * +027 * <p>This class is immutable and thread-safe.</p> +028 * +029 * @version $Id$ +030 */ +031@SuppressWarnings("rawtypes") +032// TODO ought to implement Comparator<String> but that's not possible whilst maintaining binary compatibility. +033public class StringEncoderComparator implements Comparator { +034 +035 /** +036 * Internal encoder instance. +037 */ +038 private final StringEncoder stringEncoder; +039 +040 /** +041 * Constructs a new instance. +042 * +043 * @deprecated Creating an instance without a {@link StringEncoder} leads to a {@link NullPointerException}. Will be +044 * removed in 2.0. +045 */ +046 @Deprecated +047 public StringEncoderComparator() { +048 this.stringEncoder = null; // Trying to use this will cause things to break +049 } +050 +051 /** +052 * Constructs a new instance with the given algorithm. +053 * +054 * @param stringEncoder +055 * the StringEncoder used for comparisons. +056 */ +057 public StringEncoderComparator(final StringEncoder stringEncoder) { +058 this.stringEncoder = stringEncoder; +059 } +060 +061 /** +062 * Compares two strings based not on the strings themselves, but on an encoding of the two strings using the +063 * StringEncoder this Comparator was created with. +064 * +065 * If an {@link EncoderException} is encountered, return <code>0</code>. +066 * +067 * @param o1 +068 * the object to compare +069 * @param o2 +070 * the object to compare to +071 * @return the Comparable.compareTo() return code or 0 if an encoding error was caught. +072 * @see Comparable +073 */ +074 @Override +075 public int compare(final Object o1, final Object o2) { +076 +077 int compareCode = 0; +078 +079 try { +080 @SuppressWarnings("unchecked") // May fail with CCE if encode returns something that is not Comparable +081 // However this was always the case. +082 final Comparable<Comparable<?>> s1 = (Comparable<Comparable<?>>) this.stringEncoder.encode(o1); +083 final Comparable<?> s2 = (Comparable<?>) this.stringEncoder.encode(o2); +084 compareCode = s1.compareTo(s2); +085 } catch (final EncoderException ee) { +086 compareCode = 0; +087 } +088 return compareCode; +089 } +090 +091} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020/** +021 * Provides Base32 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a>. +022 * +023 * <p> +024 * The class can be parameterized in the following manner with various constructors: +025 * </p> +026 * <ul> +027 * <li>Whether to use the "base32hex" variant instead of the default "base32"</li> +028 * <li>Line length: Default 76. Line length that aren't multiples of 8 will still essentially end up being multiples of +029 * 8 in the encoded data. +030 * <li>Line separator: Default is CRLF ("\r\n")</li> +031 * </ul> +032 * <p> +033 * This class operates directly on byte streams, and not character streams. +034 * </p> +035 * <p> +036 * This class is thread-safe. +037 * </p> +038 * +039 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> +040 * +041 * @since 1.5 +042 * @version $Id$ +043 */ +044public class Base32 extends BaseNCodec { +045 +046 /** +047 * BASE32 characters are 5 bits in length. +048 * They are formed by taking a block of five octets to form a 40-bit string, +049 * which is converted into eight BASE32 characters. +050 */ +051 private static final int BITS_PER_ENCODED_BYTE = 5; +052 private static final int BYTES_PER_ENCODED_BLOCK = 8; +053 private static final int BYTES_PER_UNENCODED_BLOCK = 5; +054 +055 /** +056 * Chunk separator per RFC 2045 section 2.1. +057 * +058 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> +059 */ +060 private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; +061 +062 /** +063 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Alphabet" (as specified +064 * in Table 3 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the Base32 +065 * alphabet but fall within the bounds of the array are translated to -1. +066 */ +067 private static final byte[] DECODE_TABLE = { +068 // 0 1 2 3 4 5 6 7 8 9 A B C D E F +069 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f +070 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f +071 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f +072 -1, -1, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 +073 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O +074 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 50-5a P-Z +075 -1, -1, -1, -1, -1, // 5b - 5f +076 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 60 - 6f a-o +077 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 70 - 7a p-z/**/ +078 }; +079 +080 /** +081 * This array is a lookup table that translates 5-bit positive integer index values into their "Base32 Alphabet" +082 * equivalents as specified in Table 3 of RFC 4648. +083 */ +084 private static final byte[] ENCODE_TABLE = { +085 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', +086 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +087 '2', '3', '4', '5', '6', '7', +088 }; +089 +090 /** +091 * This array is a lookup table that translates Unicode characters drawn from the "Base32 Hex Alphabet" (as +092 * specified in Table 4 of RFC 4648) into their 5-bit positive integer equivalents. Characters that are not in the +093 * Base32 Hex alphabet but fall within the bounds of the array are translated to -1. +094 */ +095 private static final byte[] HEX_DECODE_TABLE = { +096 // 0 1 2 3 4 5 6 7 8 9 A B C D E F +097 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f +098 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f +099 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f +100 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 2-7 +101 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 40-4f A-O +102 25, 26, 27, 28, 29, 30, 31, // 50-56 P-V +103 -1, -1, -1, -1, -1, -1, -1, -1, -1, // 57-5f Z-_ +104 -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, // 60-6f `-o +105 25, 26, 27, 28, 29, 30, 31 // 70-76 p-v +106 }; +107 +108 /** +109 * This array is a lookup table that translates 5-bit positive integer index values into their +110 * "Base32 Hex Alphabet" equivalents as specified in Table 4 of RFC 4648. +111 */ +112 private static final byte[] HEX_ENCODE_TABLE = { +113 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +114 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', +115 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', +116 }; +117 +118 /** Mask used to extract 5 bits, used when encoding Base32 bytes */ +119 private static final int MASK_5BITS = 0x1f; +120 +121 // The static final fields above are used for the original static byte[] methods on Base32. +122 // The private member fields below are used with the new streaming approach, which requires +123 // some state be preserved between calls of encode() and decode(). +124 +125 /** +126 * Place holder for the bytes we're dealing with for our based logic. +127 * Bitwise operations store and extract the encoding or decoding from this variable. +128 */ +129 +130 /** +131 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. +132 * <code>decodeSize = {@link #BYTES_PER_ENCODED_BLOCK} - 1 + lineSeparator.length;</code> +133 */ +134 private final int decodeSize; +135 +136 /** +137 * Decode table to use. +138 */ +139 private final byte[] decodeTable; +140 +141 /** +142 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. +143 * <code>encodeSize = {@link #BYTES_PER_ENCODED_BLOCK} + lineSeparator.length;</code> +144 */ +145 private final int encodeSize; +146 +147 /** +148 * Encode table to use. +149 */ +150 private final byte[] encodeTable; +151 +152 /** +153 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. +154 */ +155 private final byte[] lineSeparator; +156 +157 /** +158 * Creates a Base32 codec used for decoding and encoding. +159 * <p> +160 * When encoding the line length is 0 (no chunking). +161 * </p> +162 * +163 */ +164 public Base32() { +165 this(false); +166 } +167 +168 /** +169 * Creates a Base32 codec used for decoding and encoding. +170 * <p> +171 * When encoding the line length is 0 (no chunking). +172 * </p> +173 * @param pad byte used as padding byte. +174 */ +175 public Base32(final byte pad) { +176 this(false, pad); +177 } +178 +179 /** +180 * Creates a Base32 codec used for decoding and encoding. +181 * <p> +182 * When encoding the line length is 0 (no chunking). +183 * </p> +184 * @param useHex if {@code true} then use Base32 Hex alphabet +185 */ +186 public Base32(final boolean useHex) { +187 this(0, null, useHex, PAD_DEFAULT); +188 } +189 +190 /** +191 * Creates a Base32 codec used for decoding and encoding. +192 * <p> +193 * When encoding the line length is 0 (no chunking). +194 * </p> +195 * @param useHex if {@code true} then use Base32 Hex alphabet +196 * @param pad byte used as padding byte. +197 */ +198 public Base32(final boolean useHex, final byte pad) { +199 this(0, null, useHex, pad); +200 } +201 +202 /** +203 * Creates a Base32 codec used for decoding and encoding. +204 * <p> +205 * When encoding the line length is given in the constructor, the line separator is CRLF. +206 * </p> +207 * +208 * @param lineLength +209 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +210 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +211 * decoding. +212 */ +213 public Base32(final int lineLength) { +214 this(lineLength, CHUNK_SEPARATOR); +215 } +216 +217 /** +218 * Creates a Base32 codec used for decoding and encoding. +219 * <p> +220 * When encoding the line length and line separator are given in the constructor. +221 * </p> +222 * <p> +223 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +224 * </p> +225 * +226 * @param lineLength +227 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +228 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +229 * decoding. +230 * @param lineSeparator +231 * Each line of encoded data will end with this sequence of bytes. +232 * @throws IllegalArgumentException +233 * The provided lineSeparator included some Base32 characters. That's not going to work! +234 */ +235 public Base32(final int lineLength, final byte[] lineSeparator) { +236 this(lineLength, lineSeparator, false, PAD_DEFAULT); +237 } +238 +239 /** +240 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. +241 * <p> +242 * When encoding the line length and line separator are given in the constructor. +243 * </p> +244 * <p> +245 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +246 * </p> +247 * +248 * @param lineLength +249 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +250 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +251 * decoding. +252 * @param lineSeparator +253 * Each line of encoded data will end with this sequence of bytes. +254 * @param useHex +255 * if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet +256 * @throws IllegalArgumentException +257 * The provided lineSeparator included some Base32 characters. That's not going to work! Or the +258 * lineLength > 0 and lineSeparator is null. +259 */ +260 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex) { +261 this(lineLength, lineSeparator, useHex, PAD_DEFAULT); +262 } +263 +264 /** +265 * Creates a Base32 / Base32 Hex codec used for decoding and encoding. +266 * <p> +267 * When encoding the line length and line separator are given in the constructor. +268 * </p> +269 * <p> +270 * Line lengths that aren't multiples of 8 will still essentially end up being multiples of 8 in the encoded data. +271 * </p> +272 * +273 * @param lineLength +274 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +275 * 8). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +276 * decoding. +277 * @param lineSeparator +278 * Each line of encoded data will end with this sequence of bytes. +279 * @param useHex +280 * if {@code true}, then use Base32 Hex alphabet, otherwise use Base32 alphabet +281 * @param pad byte used as padding byte. +282 * @throws IllegalArgumentException +283 * The provided lineSeparator included some Base32 characters. That's not going to work! Or the +284 * lineLength > 0 and lineSeparator is null. +285 */ +286 public Base32(final int lineLength, final byte[] lineSeparator, final boolean useHex, final byte pad) { +287 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, lineLength, +288 lineSeparator == null ? 0 : lineSeparator.length, pad); +289 if (useHex) { +290 this.encodeTable = HEX_ENCODE_TABLE; +291 this.decodeTable = HEX_DECODE_TABLE; +292 } else { +293 this.encodeTable = ENCODE_TABLE; +294 this.decodeTable = DECODE_TABLE; +295 } +296 if (lineLength > 0) { +297 if (lineSeparator == null) { +298 throw new IllegalArgumentException("lineLength " + lineLength + " > 0, but lineSeparator is null"); +299 } +300 // Must be done after initializing the tables +301 if (containsAlphabetOrPad(lineSeparator)) { +302 final String sep = StringUtils.newStringUtf8(lineSeparator); +303 throw new IllegalArgumentException("lineSeparator must not contain Base32 characters: [" + sep + "]"); +304 } +305 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; +306 this.lineSeparator = new byte[lineSeparator.length]; +307 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); +308 } else { +309 this.encodeSize = BYTES_PER_ENCODED_BLOCK; +310 this.lineSeparator = null; +311 } +312 this.decodeSize = this.encodeSize - 1; +313 +314 if (isInAlphabet(pad) || isWhiteSpace(pad)) { +315 throw new IllegalArgumentException("pad must not be in alphabet or whitespace"); +316 } +317 } +318 +319 /** +320 * <p> +321 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once +322 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" +323 * call is not necessary when decoding, but it doesn't hurt, either. +324 * </p> +325 * <p> +326 * Ignores all non-Base32 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are +327 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, +328 * garbage-out philosophy: it will not check the provided data for validity. +329 * </p> +330 * +331 * @param in +332 * byte[] array of ascii data to Base32 decode. +333 * @param inPos +334 * Position to start reading data from. +335 * @param inAvail +336 * Amount of bytes available from input for encoding. +337 * @param context the context to be used +338 * +339 * Output is written to {@link Context#buffer} as 8-bit octets, using {@link Context#pos} as the buffer position +340 */ +341 @Override +342 void decode(final byte[] in, int inPos, final int inAvail, final Context context) { +343 // package protected for access from I/O streams +344 +345 if (context.eof) { +346 return; +347 } +348 if (inAvail < 0) { +349 context.eof = true; +350 } +351 for (int i = 0; i < inAvail; i++) { +352 final byte b = in[inPos++]; +353 if (b == pad) { +354 // We're done. +355 context.eof = true; +356 break; +357 } +358 final byte[] buffer = ensureBufferSize(decodeSize, context); +359 if (b >= 0 && b < this.decodeTable.length) { +360 final int result = this.decodeTable[b]; +361 if (result >= 0) { +362 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; +363 // collect decoded bytes +364 context.lbitWorkArea = (context.lbitWorkArea << BITS_PER_ENCODED_BYTE) + result; +365 if (context.modulus == 0) { // we can output the 5 bytes +366 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 32) & MASK_8BITS); +367 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); +368 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); +369 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); +370 buffer[context.pos++] = (byte) (context.lbitWorkArea & MASK_8BITS); +371 } +372 } +373 } +374 } +375 +376 // Two forms of EOF as far as Base32 decoder is concerned: actual +377 // EOF (-1) and first time '=' character is encountered in stream. +378 // This approach makes the '=' padding characters completely optional. +379 if (context.eof && context.modulus >= 2) { // if modulus < 2, nothing to do +380 final byte[] buffer = ensureBufferSize(decodeSize, context); +381 +382 // we ignore partial bytes, i.e. only multiples of 8 count +383 switch (context.modulus) { +384 case 2 : // 10 bits, drop 2 and output one byte +385 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 2) & MASK_8BITS); +386 break; +387 case 3 : // 15 bits, drop 7 and output 1 byte +388 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 7) & MASK_8BITS); +389 break; +390 case 4 : // 20 bits = 2*8 + 4 +391 context.lbitWorkArea = context.lbitWorkArea >> 4; // drop 4 bits +392 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); +393 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); +394 break; +395 case 5 : // 25bits = 3*8 + 1 +396 context.lbitWorkArea = context.lbitWorkArea >> 1; +397 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); +398 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); +399 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); +400 break; +401 case 6 : // 30bits = 3*8 + 6 +402 context.lbitWorkArea = context.lbitWorkArea >> 6; +403 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); +404 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); +405 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); +406 break; +407 case 7 : // 35 = 4*8 +3 +408 context.lbitWorkArea = context.lbitWorkArea >> 3; +409 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 24) & MASK_8BITS); +410 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 16) & MASK_8BITS); +411 buffer[context.pos++] = (byte) ((context.lbitWorkArea >> 8) & MASK_8BITS); +412 buffer[context.pos++] = (byte) ((context.lbitWorkArea) & MASK_8BITS); +413 break; +414 default: +415 // modulus can be 0-7, and we excluded 0,1 already +416 throw new IllegalStateException("Impossible modulus "+context.modulus); +417 } +418 } +419 } +420 +421 /** +422 * <p> +423 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with +424 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, so flush last +425 * remaining bytes (if not multiple of 5). +426 * </p> +427 * +428 * @param in +429 * byte[] array of binary data to Base32 encode. +430 * @param inPos +431 * Position to start reading data from. +432 * @param inAvail +433 * Amount of bytes available from input for encoding. +434 * @param context the context to be used +435 */ +436 @Override +437 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { +438 // package protected for access from I/O streams +439 +440 if (context.eof) { +441 return; +442 } +443 // inAvail < 0 is how we're informed of EOF in the underlying data we're +444 // encoding. +445 if (inAvail < 0) { +446 context.eof = true; +447 if (0 == context.modulus && lineLength == 0) { +448 return; // no leftovers to process and not using chunking +449 } +450 final byte[] buffer = ensureBufferSize(encodeSize, context); +451 final int savedPos = context.pos; +452 switch (context.modulus) { // % 5 +453 case 0 : +454 break; +455 case 1 : // Only 1 octet; take top 5 bits then remainder +456 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 3) & MASK_5BITS]; // 8-1*5 = 3 +457 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 2) & MASK_5BITS]; // 5-3=2 +458 buffer[context.pos++] = pad; +459 buffer[context.pos++] = pad; +460 buffer[context.pos++] = pad; +461 buffer[context.pos++] = pad; +462 buffer[context.pos++] = pad; +463 buffer[context.pos++] = pad; +464 break; +465 case 2 : // 2 octets = 16 bits to use +466 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 11) & MASK_5BITS]; // 16-1*5 = 11 +467 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 6) & MASK_5BITS]; // 16-2*5 = 6 +468 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 1) & MASK_5BITS]; // 16-3*5 = 1 +469 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 4) & MASK_5BITS]; // 5-1 = 4 +470 buffer[context.pos++] = pad; +471 buffer[context.pos++] = pad; +472 buffer[context.pos++] = pad; +473 buffer[context.pos++] = pad; +474 break; +475 case 3 : // 3 octets = 24 bits to use +476 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 19) & MASK_5BITS]; // 24-1*5 = 19 +477 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 14) & MASK_5BITS]; // 24-2*5 = 14 +478 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 9) & MASK_5BITS]; // 24-3*5 = 9 +479 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 4) & MASK_5BITS]; // 24-4*5 = 4 +480 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 1) & MASK_5BITS]; // 5-4 = 1 +481 buffer[context.pos++] = pad; +482 buffer[context.pos++] = pad; +483 buffer[context.pos++] = pad; +484 break; +485 case 4 : // 4 octets = 32 bits to use +486 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 27) & MASK_5BITS]; // 32-1*5 = 27 +487 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 22) & MASK_5BITS]; // 32-2*5 = 22 +488 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 17) & MASK_5BITS]; // 32-3*5 = 17 +489 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 12) & MASK_5BITS]; // 32-4*5 = 12 +490 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 7) & MASK_5BITS]; // 32-5*5 = 7 +491 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 2) & MASK_5BITS]; // 32-6*5 = 2 +492 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea << 3) & MASK_5BITS]; // 5-2 = 3 +493 buffer[context.pos++] = pad; +494 break; +495 default: +496 throw new IllegalStateException("Impossible modulus "+context.modulus); +497 } +498 context.currentLinePos += context.pos - savedPos; // keep track of current line position +499 // if currentPos == 0 we are at the start of a line, so don't add CRLF +500 if (lineLength > 0 && context.currentLinePos > 0){ // add chunk separator if required +501 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); +502 context.pos += lineSeparator.length; +503 } +504 } else { +505 for (int i = 0; i < inAvail; i++) { +506 final byte[] buffer = ensureBufferSize(encodeSize, context); +507 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; +508 int b = in[inPos++]; +509 if (b < 0) { +510 b += 256; +511 } +512 context.lbitWorkArea = (context.lbitWorkArea << 8) + b; // BITS_PER_BYTE +513 if (0 == context.modulus) { // we have enough bytes to create our output +514 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 35) & MASK_5BITS]; +515 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 30) & MASK_5BITS]; +516 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 25) & MASK_5BITS]; +517 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 20) & MASK_5BITS]; +518 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 15) & MASK_5BITS]; +519 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 10) & MASK_5BITS]; +520 buffer[context.pos++] = encodeTable[(int)(context.lbitWorkArea >> 5) & MASK_5BITS]; +521 buffer[context.pos++] = encodeTable[(int)context.lbitWorkArea & MASK_5BITS]; +522 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; +523 if (lineLength > 0 && lineLength <= context.currentLinePos) { +524 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); +525 context.pos += lineSeparator.length; +526 context.currentLinePos = 0; +527 } +528 } +529 } +530 } +531 } +532 +533 /** +534 * Returns whether or not the {@code octet} is in the Base32 alphabet. +535 * +536 * @param octet +537 * The value to test +538 * @return {@code true} if the value is defined in the the Base32 alphabet {@code false} otherwise. +539 */ +540 @Override +541 public boolean isInAlphabet(final byte octet) { +542 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; +543 } +544} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.io.InputStream; +021 +022/** +023 * Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength +024 * is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate +025 * constructor. +026 * <p> +027 * The default behaviour of the Base32InputStream is to DECODE, whereas the default behaviour of the Base32OutputStream +028 * is to ENCODE, but this behaviour can be overridden by using a different constructor. +029 * </p> +030 * <p> +031 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode +032 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +033 * </p> +034 * +035 * @version $Id$ +036 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> +037 * @since 1.5 +038 */ +039public class Base32InputStream extends BaseNCodecInputStream { +040 +041 /** +042 * Creates a Base32InputStream such that all data read is Base32-decoded from the original provided InputStream. +043 * +044 * @param in +045 * InputStream to wrap. +046 */ +047 public Base32InputStream(final InputStream in) { +048 this(in, false); +049 } +050 +051 /** +052 * Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original +053 * provided InputStream. +054 * +055 * @param in +056 * InputStream to wrap. +057 * @param doEncode +058 * true if we should encode all data read from us, false if we should decode. +059 */ +060 public Base32InputStream(final InputStream in, final boolean doEncode) { +061 super(in, new Base32(false), doEncode); +062 } +063 +064 /** +065 * Creates a Base32InputStream such that all data read is either Base32-encoded or Base32-decoded from the original +066 * provided InputStream. +067 * +068 * @param in +069 * InputStream to wrap. +070 * @param doEncode +071 * true if we should encode all data read from us, false if we should decode. +072 * @param lineLength +073 * If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to +074 * nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode +075 * is false, lineLength is ignored. +076 * @param lineSeparator +077 * If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). +078 * If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. +079 */ +080 public Base32InputStream(final InputStream in, final boolean doEncode, +081 final int lineLength, final byte[] lineSeparator) { +082 super(in, new Base32(lineLength, lineSeparator), doEncode); +083 } +084 +085} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.io.OutputStream; +021 +022/** +023 * Provides Base32 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength +024 * is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate +025 * constructor. +026 * <p> +027 * The default behaviour of the Base32OutputStream is to ENCODE, whereas the default behaviour of the Base32InputStream +028 * is to DECODE. But this behaviour can be overridden by using a different constructor. +029 * </p> +030 * <p> +031 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode +032 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +033 * </p> +034 * <p> +035 * <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the +036 * final padding will be omitted and the resulting data will be incomplete/inconsistent. +037 * </p> +038 * +039 * @version $Id$ +040 * @see <a href="http://www.ietf.org/rfc/rfc4648.txt">RFC 4648</a> +041 * @since 1.5 +042 */ +043public class Base32OutputStream extends BaseNCodecOutputStream { +044 +045 /** +046 * Creates a Base32OutputStream such that all data written is Base32-encoded to the original provided OutputStream. +047 * +048 * @param out +049 * OutputStream to wrap. +050 */ +051 public Base32OutputStream(final OutputStream out) { +052 this(out, true); +053 } +054 +055 /** +056 * Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the +057 * original provided OutputStream. +058 * +059 * @param out +060 * OutputStream to wrap. +061 * @param doEncode +062 * true if we should encode all data written to us, false if we should decode. +063 */ +064 public Base32OutputStream(final OutputStream out, final boolean doEncode) { +065 super(out, new Base32(false), doEncode); +066 } +067 +068 /** +069 * Creates a Base32OutputStream such that all data written is either Base32-encoded or Base32-decoded to the +070 * original provided OutputStream. +071 * +072 * @param out +073 * OutputStream to wrap. +074 * @param doEncode +075 * true if we should encode all data written to us, false if we should decode. +076 * @param lineLength +077 * If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to +078 * nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode +079 * is false, lineLength is ignored. +080 * @param lineSeparator +081 * If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). +082 * If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. +083 */ +084 public Base32OutputStream(final OutputStream out, final boolean doEncode, +085 final int lineLength, final byte[] lineSeparator) { +086 super(out, new Base32(lineLength, lineSeparator), doEncode); +087 } +088 +089} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.math.BigInteger; +021 +022/** +023 * Provides Base64 encoding and decoding as defined by <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>. +024 * +025 * <p> +026 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose +027 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. +028 * </p> +029 * <p> +030 * The class can be parameterized in the following manner with various constructors: +031 * </p> +032 * <ul> +033 * <li>URL-safe mode: Default off.</li> +034 * <li>Line length: Default 76. Line length that aren't multiples of 4 will still essentially end up being multiples of +035 * 4 in the encoded data. +036 * <li>Line separator: Default is CRLF ("\r\n")</li> +037 * </ul> +038 * <p> +039 * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. +040 * </p> +041 * <p> +042 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only +043 * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, +044 * UTF-8, etc). +045 * </p> +046 * <p> +047 * This class is thread-safe. +048 * </p> +049 * +050 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> +051 * @since 1.0 +052 * @version $Id$ +053 */ +054public class Base64 extends BaseNCodec { +055 +056 /** +057 * BASE32 characters are 6 bits in length. +058 * They are formed by taking a block of 3 octets to form a 24-bit string, +059 * which is converted into 4 BASE64 characters. +060 */ +061 private static final int BITS_PER_ENCODED_BYTE = 6; +062 private static final int BYTES_PER_UNENCODED_BLOCK = 3; +063 private static final int BYTES_PER_ENCODED_BLOCK = 4; +064 +065 /** +066 * Chunk separator per RFC 2045 section 2.1. +067 * +068 * <p> +069 * N.B. The next major release may break compatibility and make this field private. +070 * </p> +071 * +072 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a> +073 */ +074 static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; +075 +076 /** +077 * This array is a lookup table that translates 6-bit positive integer index values into their "Base64 Alphabet" +078 * equivalents as specified in Table 1 of RFC 2045. +079 * +080 * Thanks to "commons" project in ws.apache.org for this code. +081 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ +082 */ +083 private static final byte[] STANDARD_ENCODE_TABLE = { +084 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', +085 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +086 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', +087 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +088 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' +089 }; +090 +091 /** +092 * This is a copy of the STANDARD_ENCODE_TABLE above, but with + and / +093 * changed to - and _ to make the encoded Base64 results more URL-SAFE. +094 * This table is only used when the Base64's mode is set to URL-SAFE. +095 */ +096 private static final byte[] URL_SAFE_ENCODE_TABLE = { +097 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', +098 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', +099 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', +100 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', +101 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' +102 }; +103 +104 /** +105 * This array is a lookup table that translates Unicode characters drawn from the "Base64 Alphabet" (as specified +106 * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Base64 +107 * alphabet but fall within the bounds of the array are translated to -1. +108 * +109 * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both +110 * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). +111 * +112 * Thanks to "commons" project in ws.apache.org for this code. +113 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ +114 */ +115 private static final byte[] DECODE_TABLE = { +116 // 0 1 2 3 4 5 6 7 8 9 A B C D E F +117 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f +118 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f +119 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63, // 20-2f + - / +120 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 +121 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 40-4f A-O +122 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63, // 50-5f P-Z _ +123 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 60-6f a-o +124 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 // 70-7a p-z +125 }; +126 +127 /** +128 * Base64 uses 6-bit fields. +129 */ +130 /** Mask used to extract 6 bits, used when encoding */ +131 private static final int MASK_6BITS = 0x3f; +132 +133 // The static final fields above are used for the original static byte[] methods on Base64. +134 // The private member fields below are used with the new streaming approach, which requires +135 // some state be preserved between calls of encode() and decode(). +136 +137 /** +138 * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able +139 * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch +140 * between the two modes. +141 */ +142 private final byte[] encodeTable; +143 +144 // Only one decode table currently; keep for consistency with Base32 code +145 private final byte[] decodeTable = DECODE_TABLE; +146 +147 /** +148 * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. +149 */ +150 private final byte[] lineSeparator; +151 +152 /** +153 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. +154 * <code>decodeSize = 3 + lineSeparator.length;</code> +155 */ +156 private final int decodeSize; +157 +158 /** +159 * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. +160 * <code>encodeSize = 4 + lineSeparator.length;</code> +161 */ +162 private final int encodeSize; +163 +164 /** +165 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. +166 * <p> +167 * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. +168 * </p> +169 * +170 * <p> +171 * When decoding all variants are supported. +172 * </p> +173 */ +174 public Base64() { +175 this(0); +176 } +177 +178 /** +179 * Creates a Base64 codec used for decoding (all modes) and encoding in the given URL-safe mode. +180 * <p> +181 * When encoding the line length is 76, the line separator is CRLF, and the encoding table is STANDARD_ENCODE_TABLE. +182 * </p> +183 * +184 * <p> +185 * When decoding all variants are supported. +186 * </p> +187 * +188 * @param urlSafe +189 * if <code>true</code>, URL-safe encoding is used. In most cases this should be set to +190 * <code>false</code>. +191 * @since 1.4 +192 */ +193 public Base64(final boolean urlSafe) { +194 this(MIME_CHUNK_SIZE, CHUNK_SEPARATOR, urlSafe); +195 } +196 +197 /** +198 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. +199 * <p> +200 * When encoding the line length is given in the constructor, the line separator is CRLF, and the encoding table is +201 * STANDARD_ENCODE_TABLE. +202 * </p> +203 * <p> +204 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +205 * </p> +206 * <p> +207 * When decoding all variants are supported. +208 * </p> +209 * +210 * @param lineLength +211 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +212 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +213 * decoding. +214 * @since 1.4 +215 */ +216 public Base64(final int lineLength) { +217 this(lineLength, CHUNK_SEPARATOR); +218 } +219 +220 /** +221 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. +222 * <p> +223 * When encoding the line length and line separator are given in the constructor, and the encoding table is +224 * STANDARD_ENCODE_TABLE. +225 * </p> +226 * <p> +227 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +228 * </p> +229 * <p> +230 * When decoding all variants are supported. +231 * </p> +232 * +233 * @param lineLength +234 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +235 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +236 * decoding. +237 * @param lineSeparator +238 * Each line of encoded data will end with this sequence of bytes. +239 * @throws IllegalArgumentException +240 * Thrown when the provided lineSeparator included some base64 characters. +241 * @since 1.4 +242 */ +243 public Base64(final int lineLength, final byte[] lineSeparator) { +244 this(lineLength, lineSeparator, false); +245 } +246 +247 /** +248 * Creates a Base64 codec used for decoding (all modes) and encoding in URL-unsafe mode. +249 * <p> +250 * When encoding the line length and line separator are given in the constructor, and the encoding table is +251 * STANDARD_ENCODE_TABLE. +252 * </p> +253 * <p> +254 * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. +255 * </p> +256 * <p> +257 * When decoding all variants are supported. +258 * </p> +259 * +260 * @param lineLength +261 * Each line of encoded data will be at most of the given length (rounded down to nearest multiple of +262 * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when +263 * decoding. +264 * @param lineSeparator +265 * Each line of encoded data will end with this sequence of bytes. +266 * @param urlSafe +267 * Instead of emitting '+' and '/' we emit '-' and '_' respectively. urlSafe is only applied to encode +268 * operations. Decoding seamlessly handles both modes. +269 * <b>Note: no padding is added when using the URL-safe alphabet.</b> +270 * @throws IllegalArgumentException +271 * The provided lineSeparator included some base64 characters. That's not going to work! +272 * @since 1.4 +273 */ +274 public Base64(final int lineLength, final byte[] lineSeparator, final boolean urlSafe) { +275 super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, +276 lineLength, +277 lineSeparator == null ? 0 : lineSeparator.length); +278 // TODO could be simplified if there is no requirement to reject invalid line sep when length <=0 +279 // @see test case Base64Test.testConstructors() +280 if (lineSeparator != null) { +281 if (containsAlphabetOrPad(lineSeparator)) { +282 final String sep = StringUtils.newStringUtf8(lineSeparator); +283 throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); +284 } +285 if (lineLength > 0){ // null line-sep forces no chunking rather than throwing IAE +286 this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; +287 this.lineSeparator = new byte[lineSeparator.length]; +288 System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); +289 } else { +290 this.encodeSize = BYTES_PER_ENCODED_BLOCK; +291 this.lineSeparator = null; +292 } +293 } else { +294 this.encodeSize = BYTES_PER_ENCODED_BLOCK; +295 this.lineSeparator = null; +296 } +297 this.decodeSize = this.encodeSize - 1; +298 this.encodeTable = urlSafe ? URL_SAFE_ENCODE_TABLE : STANDARD_ENCODE_TABLE; +299 } +300 +301 /** +302 * Returns our current encode mode. True if we're URL-SAFE, false otherwise. +303 * +304 * @return true if we're in URL-SAFE mode, false otherwise. +305 * @since 1.4 +306 */ +307 public boolean isUrlSafe() { +308 return this.encodeTable == URL_SAFE_ENCODE_TABLE; +309 } +310 +311 /** +312 * <p> +313 * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with +314 * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last +315 * remaining bytes (if not multiple of 3). +316 * </p> +317 * <p><b>Note: no padding is added when encoding using the URL-safe alphabet.</b></p> +318 * <p> +319 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. +320 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ +321 * </p> +322 * +323 * @param in +324 * byte[] array of binary data to base64 encode. +325 * @param inPos +326 * Position to start reading data from. +327 * @param inAvail +328 * Amount of bytes available from input for encoding. +329 * @param context +330 * the context to be used +331 */ +332 @Override +333 void encode(final byte[] in, int inPos, final int inAvail, final Context context) { +334 if (context.eof) { +335 return; +336 } +337 // inAvail < 0 is how we're informed of EOF in the underlying data we're +338 // encoding. +339 if (inAvail < 0) { +340 context.eof = true; +341 if (0 == context.modulus && lineLength == 0) { +342 return; // no leftovers to process and not using chunking +343 } +344 final byte[] buffer = ensureBufferSize(encodeSize, context); +345 final int savedPos = context.pos; +346 switch (context.modulus) { // 0-2 +347 case 0 : // nothing to do here +348 break; +349 case 1 : // 8 bits = 6 + 2 +350 // top 6 bits: +351 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; +352 // remaining 2: +353 buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; +354 // URL-SAFE skips the padding to further reduce size. +355 if (encodeTable == STANDARD_ENCODE_TABLE) { +356 buffer[context.pos++] = pad; +357 buffer[context.pos++] = pad; +358 } +359 break; +360 +361 case 2 : // 16 bits = 6 + 6 + 4 +362 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; +363 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; +364 buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; +365 // URL-SAFE skips the padding to further reduce size. +366 if (encodeTable == STANDARD_ENCODE_TABLE) { +367 buffer[context.pos++] = pad; +368 } +369 break; +370 default: +371 throw new IllegalStateException("Impossible modulus "+context.modulus); +372 } +373 context.currentLinePos += context.pos - savedPos; // keep track of current line position +374 // if currentPos == 0 we are at the start of a line, so don't add CRLF +375 if (lineLength > 0 && context.currentLinePos > 0) { +376 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); +377 context.pos += lineSeparator.length; +378 } +379 } else { +380 for (int i = 0; i < inAvail; i++) { +381 final byte[] buffer = ensureBufferSize(encodeSize, context); +382 context.modulus = (context.modulus+1) % BYTES_PER_UNENCODED_BLOCK; +383 int b = in[inPos++]; +384 if (b < 0) { +385 b += 256; +386 } +387 context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE +388 if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract +389 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; +390 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; +391 buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; +392 buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; +393 context.currentLinePos += BYTES_PER_ENCODED_BLOCK; +394 if (lineLength > 0 && lineLength <= context.currentLinePos) { +395 System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); +396 context.pos += lineSeparator.length; +397 context.currentLinePos = 0; +398 } +399 } +400 } +401 } +402 } +403 +404 /** +405 * <p> +406 * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once +407 * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" +408 * call is not necessary when decoding, but it doesn't hurt, either. +409 * </p> +410 * <p> +411 * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are +412 * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, +413 * garbage-out philosophy: it will not check the provided data for validity. +414 * </p> +415 * <p> +416 * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. +417 * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ +418 * </p> +419 * +420 * @param in +421 * byte[] array of ascii data to base64 decode. +422 * @param inPos +423 * Position to start reading data from. +424 * @param inAvail +425 * Amount of bytes available from input for encoding. +426 * @param context +427 * the context to be used +428 */ +429 @Override +430 void decode(final byte[] in, int inPos, final int inAvail, final Context context) { +431 if (context.eof) { +432 return; +433 } +434 if (inAvail < 0) { +435 context.eof = true; +436 } +437 for (int i = 0; i < inAvail; i++) { +438 final byte[] buffer = ensureBufferSize(decodeSize, context); +439 final byte b = in[inPos++]; +440 if (b == pad) { +441 // We're done. +442 context.eof = true; +443 break; +444 } +445 if (b >= 0 && b < DECODE_TABLE.length) { +446 final int result = DECODE_TABLE[b]; +447 if (result >= 0) { +448 context.modulus = (context.modulus+1) % BYTES_PER_ENCODED_BLOCK; +449 context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; +450 if (context.modulus == 0) { +451 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); +452 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); +453 buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); +454 } +455 } +456 } +457 } +458 +459 // Two forms of EOF as far as base64 decoder is concerned: actual +460 // EOF (-1) and first time '=' character is encountered in stream. +461 // This approach makes the '=' padding characters completely optional. +462 if (context.eof && context.modulus != 0) { +463 final byte[] buffer = ensureBufferSize(decodeSize, context); +464 +465 // We have some spare bits remaining +466 // Output all whole multiples of 8 bits and ignore the rest +467 switch (context.modulus) { +468// case 0 : // impossible, as excluded above +469 case 1 : // 6 bits - ignore entirely +470 // TODO not currently tested; perhaps it is impossible? +471 break; +472 case 2 : // 12 bits = 8 + 4 +473 context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits +474 buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); +475 break; +476 case 3 : // 18 bits = 8 + 8 + 2 +477 context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits +478 buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); +479 buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); +480 break; +481 default: +482 throw new IllegalStateException("Impossible modulus "+context.modulus); +483 } +484 } +485 } +486 +487 /** +488 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the +489 * method treats whitespace as valid. +490 * +491 * @param arrayOctet +492 * byte array to test +493 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; +494 * <code>false</code>, otherwise +495 * @deprecated 1.5 Use {@link #isBase64(byte[])}, will be removed in 2.0. +496 */ +497 @Deprecated +498 public static boolean isArrayByteBase64(final byte[] arrayOctet) { +499 return isBase64(arrayOctet); +500 } +501 +502 /** +503 * Returns whether or not the <code>octet</code> is in the base 64 alphabet. +504 * +505 * @param octet +506 * The value to test +507 * @return <code>true</code> if the value is defined in the the base 64 alphabet, <code>false</code> otherwise. +508 * @since 1.4 +509 */ +510 public static boolean isBase64(final byte octet) { +511 return octet == PAD_DEFAULT || (octet >= 0 && octet < DECODE_TABLE.length && DECODE_TABLE[octet] != -1); +512 } +513 +514 /** +515 * Tests a given String to see if it contains only valid characters within the Base64 alphabet. Currently the +516 * method treats whitespace as valid. +517 * +518 * @param base64 +519 * String to test +520 * @return <code>true</code> if all characters in the String are valid characters in the Base64 alphabet or if +521 * the String is empty; <code>false</code>, otherwise +522 * @since 1.5 +523 */ +524 public static boolean isBase64(final String base64) { +525 return isBase64(StringUtils.getBytesUtf8(base64)); +526 } +527 +528 /** +529 * Tests a given byte array to see if it contains only valid characters within the Base64 alphabet. Currently the +530 * method treats whitespace as valid. +531 * +532 * @param arrayOctet +533 * byte array to test +534 * @return <code>true</code> if all bytes are valid characters in the Base64 alphabet or if the byte array is empty; +535 * <code>false</code>, otherwise +536 * @since 1.5 +537 */ +538 public static boolean isBase64(final byte[] arrayOctet) { +539 for (int i = 0; i < arrayOctet.length; i++) { +540 if (!isBase64(arrayOctet[i]) && !isWhiteSpace(arrayOctet[i])) { +541 return false; +542 } +543 } +544 return true; +545 } +546 +547 /** +548 * Encodes binary data using the base64 algorithm but does not chunk the output. +549 * +550 * @param binaryData +551 * binary data to encode +552 * @return byte[] containing Base64 characters in their UTF-8 representation. +553 */ +554 public static byte[] encodeBase64(final byte[] binaryData) { +555 return encodeBase64(binaryData, false); +556 } +557 +558 /** +559 * Encodes binary data using the base64 algorithm but does not chunk the output. +560 * +561 * NOTE: We changed the behaviour of this method from multi-line chunking (commons-codec-1.4) to +562 * single-line non-chunking (commons-codec-1.5). +563 * +564 * @param binaryData +565 * binary data to encode +566 * @return String containing Base64 characters. +567 * @since 1.4 (NOTE: 1.4 chunked the output, whereas 1.5 does not). +568 */ +569 public static String encodeBase64String(final byte[] binaryData) { +570 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false)); +571 } +572 +573 /** +574 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The +575 * url-safe variation emits - and _ instead of + and / characters. +576 * <b>Note: no padding is added.</b> +577 * @param binaryData +578 * binary data to encode +579 * @return byte[] containing Base64 characters in their UTF-8 representation. +580 * @since 1.4 +581 */ +582 public static byte[] encodeBase64URLSafe(final byte[] binaryData) { +583 return encodeBase64(binaryData, false, true); +584 } +585 +586 /** +587 * Encodes binary data using a URL-safe variation of the base64 algorithm but does not chunk the output. The +588 * url-safe variation emits - and _ instead of + and / characters. +589 * <b>Note: no padding is added.</b> +590 * @param binaryData +591 * binary data to encode +592 * @return String containing Base64 characters +593 * @since 1.4 +594 */ +595 public static String encodeBase64URLSafeString(final byte[] binaryData) { +596 return StringUtils.newStringUsAscii(encodeBase64(binaryData, false, true)); +597 } +598 +599 /** +600 * Encodes binary data using the base64 algorithm and chunks the encoded output into 76 character blocks +601 * +602 * @param binaryData +603 * binary data to encode +604 * @return Base64 characters chunked in 76 character blocks +605 */ +606 public static byte[] encodeBase64Chunked(final byte[] binaryData) { +607 return encodeBase64(binaryData, true); +608 } +609 +610 /** +611 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. +612 * +613 * @param binaryData +614 * Array containing binary data to encode. +615 * @param isChunked +616 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks +617 * @return Base64-encoded data. +618 * @throws IllegalArgumentException +619 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} +620 */ +621 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked) { +622 return encodeBase64(binaryData, isChunked, false); +623 } +624 +625 /** +626 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. +627 * +628 * @param binaryData +629 * Array containing binary data to encode. +630 * @param isChunked +631 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks +632 * @param urlSafe +633 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. +634 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> +635 * @return Base64-encoded data. +636 * @throws IllegalArgumentException +637 * Thrown when the input array needs an output array bigger than {@link Integer#MAX_VALUE} +638 * @since 1.4 +639 */ +640 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, final boolean urlSafe) { +641 return encodeBase64(binaryData, isChunked, urlSafe, Integer.MAX_VALUE); +642 } +643 +644 /** +645 * Encodes binary data using the base64 algorithm, optionally chunking the output into 76 character blocks. +646 * +647 * @param binaryData +648 * Array containing binary data to encode. +649 * @param isChunked +650 * if <code>true</code> this encoder will chunk the base64 output into 76 character blocks +651 * @param urlSafe +652 * if <code>true</code> this encoder will emit - and _ instead of the usual + and / characters. +653 * <b>Note: no padding is added when encoding using the URL-safe alphabet.</b> +654 * @param maxResultSize +655 * The maximum result size to accept. +656 * @return Base64-encoded data. +657 * @throws IllegalArgumentException +658 * Thrown when the input array needs an output array bigger than maxResultSize +659 * @since 1.4 +660 */ +661 public static byte[] encodeBase64(final byte[] binaryData, final boolean isChunked, +662 final boolean urlSafe, final int maxResultSize) { +663 if (binaryData == null || binaryData.length == 0) { +664 return binaryData; +665 } +666 +667 // Create this so can use the super-class method +668 // Also ensures that the same roundings are performed by the ctor and the code +669 final Base64 b64 = isChunked ? new Base64(urlSafe) : new Base64(0, CHUNK_SEPARATOR, urlSafe); +670 final long len = b64.getEncodedLength(binaryData); +671 if (len > maxResultSize) { +672 throw new IllegalArgumentException("Input array too big, the output array would be bigger (" + +673 len + +674 ") than the specified maximum size of " + +675 maxResultSize); +676 } +677 +678 return b64.encode(binaryData); +679 } +680 +681 /** +682 * Decodes a Base64 String into octets. +683 * <p> +684 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. +685 * </p> +686 * +687 * @param base64String +688 * String containing Base64 data +689 * @return Array containing decoded data. +690 * @since 1.4 +691 */ +692 public static byte[] decodeBase64(final String base64String) { +693 return new Base64().decode(base64String); +694 } +695 +696 /** +697 * Decodes Base64 data into octets. +698 * <p> +699 * <b>Note:</b> this method seamlessly handles data encoded in URL-safe or normal mode. +700 * </p> +701 * +702 * @param base64Data +703 * Byte array containing Base64 data +704 * @return Array containing decoded data. +705 */ +706 public static byte[] decodeBase64(final byte[] base64Data) { +707 return new Base64().decode(base64Data); +708 } +709 +710 // Implementation of the Encoder Interface +711 +712 // Implementation of integer encoding used for crypto +713 /** +714 * Decodes a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. +715 * +716 * @param pArray +717 * a byte array containing base64 character data +718 * @return A BigInteger +719 * @since 1.4 +720 */ +721 public static BigInteger decodeInteger(final byte[] pArray) { +722 return new BigInteger(1, decodeBase64(pArray)); +723 } +724 +725 /** +726 * Encodes to a byte64-encoded integer according to crypto standards such as W3C's XML-Signature. +727 * +728 * @param bigInt +729 * a BigInteger +730 * @return A byte array containing base64 character data +731 * @throws NullPointerException +732 * if null is passed in +733 * @since 1.4 +734 */ +735 public static byte[] encodeInteger(final BigInteger bigInt) { +736 if (bigInt == null) { +737 throw new NullPointerException("encodeInteger called with null parameter"); +738 } +739 return encodeBase64(toIntegerBytes(bigInt), false); +740 } +741 +742 /** +743 * Returns a byte-array representation of a <code>BigInteger</code> without sign bit. +744 * +745 * @param bigInt +746 * <code>BigInteger</code> to be converted +747 * @return a byte array representation of the BigInteger parameter +748 */ +749 static byte[] toIntegerBytes(final BigInteger bigInt) { +750 int bitlen = bigInt.bitLength(); +751 // round bitlen +752 bitlen = ((bitlen + 7) >> 3) << 3; +753 final byte[] bigBytes = bigInt.toByteArray(); +754 +755 if (((bigInt.bitLength() % 8) != 0) && (((bigInt.bitLength() / 8) + 1) == (bitlen / 8))) { +756 return bigBytes; +757 } +758 // set up params for copying everything but sign bit +759 int startSrc = 0; +760 int len = bigBytes.length; +761 +762 // if bigInt is exactly byte-aligned, just skip signbit in copy +763 if ((bigInt.bitLength() % 8) == 0) { +764 startSrc = 1; +765 len--; +766 } +767 final int startDst = bitlen / 8 - len; // to pad w/ nulls as per spec +768 final byte[] resizedBytes = new byte[bitlen / 8]; +769 System.arraycopy(bigBytes, startSrc, resizedBytes, startDst, len); +770 return resizedBytes; +771 } +772 +773 /** +774 * Returns whether or not the <code>octet</code> is in the Base64 alphabet. +775 * +776 * @param octet +777 * The value to test +778 * @return <code>true</code> if the value is defined in the the Base64 alphabet <code>false</code> otherwise. +779 */ +780 @Override +781 protected boolean isInAlphabet(final byte octet) { +782 return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; +783 } +784 +785} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.io.InputStream; +021 +022/** +023 * Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength +024 * is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate +025 * constructor. +026 * <p> +027 * The default behaviour of the Base64InputStream is to DECODE, whereas the default behaviour of the Base64OutputStream +028 * is to ENCODE, but this behaviour can be overridden by using a different constructor. +029 * </p> +030 * <p> +031 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose +032 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. +033 * </p> +034 * <p> +035 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode +036 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +037 * </p> +038 * +039 * @version $Id$ +040 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> +041 * @since 1.4 +042 */ +043public class Base64InputStream extends BaseNCodecInputStream { +044 +045 /** +046 * Creates a Base64InputStream such that all data read is Base64-decoded from the original provided InputStream. +047 * +048 * @param in +049 * InputStream to wrap. +050 */ +051 public Base64InputStream(final InputStream in) { +052 this(in, false); +053 } +054 +055 /** +056 * Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original +057 * provided InputStream. +058 * +059 * @param in +060 * InputStream to wrap. +061 * @param doEncode +062 * true if we should encode all data read from us, false if we should decode. +063 */ +064 public Base64InputStream(final InputStream in, final boolean doEncode) { +065 super(in, new Base64(false), doEncode); +066 } +067 +068 /** +069 * Creates a Base64InputStream such that all data read is either Base64-encoded or Base64-decoded from the original +070 * provided InputStream. +071 * +072 * @param in +073 * InputStream to wrap. +074 * @param doEncode +075 * true if we should encode all data read from us, false if we should decode. +076 * @param lineLength +077 * If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to +078 * nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode +079 * is false, lineLength is ignored. +080 * @param lineSeparator +081 * If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). +082 * If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. +083 */ +084 public Base64InputStream(final InputStream in, final boolean doEncode, +085 final int lineLength, final byte[] lineSeparator) { +086 super(in, new Base64(lineLength, lineSeparator), doEncode); +087 } +088} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.io.OutputStream; +021 +022/** +023 * Provides Base64 encoding and decoding in a streaming fashion (unlimited size). When encoding the default lineLength +024 * is 76 characters and the default lineEnding is CRLF, but these can be overridden by using the appropriate +025 * constructor. +026 * <p> +027 * The default behaviour of the Base64OutputStream is to ENCODE, whereas the default behaviour of the Base64InputStream +028 * is to DECODE. But this behaviour can be overridden by using a different constructor. +029 * </p> +030 * <p> +031 * This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite> from RFC 2045 <cite>Multipurpose +032 * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies</cite> by Freed and Borenstein. +033 * </p> +034 * <p> +035 * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only encode/decode +036 * character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, UTF-8, etc). +037 * </p> +038 * <p> +039 * <b>Note:</b> It is mandatory to close the stream after the last byte has been written to it, otherwise the +040 * final padding will be omitted and the resulting data will be incomplete/inconsistent. +041 * </p> +042 * +043 * @version $Id$ +044 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a> +045 * @since 1.4 +046 */ +047public class Base64OutputStream extends BaseNCodecOutputStream { +048 +049 /** +050 * Creates a Base64OutputStream such that all data written is Base64-encoded to the original provided OutputStream. +051 * +052 * @param out +053 * OutputStream to wrap. +054 */ +055 public Base64OutputStream(final OutputStream out) { +056 this(out, true); +057 } +058 +059 /** +060 * Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the +061 * original provided OutputStream. +062 * +063 * @param out +064 * OutputStream to wrap. +065 * @param doEncode +066 * true if we should encode all data written to us, false if we should decode. +067 */ +068 public Base64OutputStream(final OutputStream out, final boolean doEncode) { +069 super(out,new Base64(false), doEncode); +070 } +071 +072 /** +073 * Creates a Base64OutputStream such that all data written is either Base64-encoded or Base64-decoded to the +074 * original provided OutputStream. +075 * +076 * @param out +077 * OutputStream to wrap. +078 * @param doEncode +079 * true if we should encode all data written to us, false if we should decode. +080 * @param lineLength +081 * If doEncode is true, each line of encoded data will contain lineLength characters (rounded down to +082 * nearest multiple of 4). If lineLength <= 0, the encoded data is not divided into lines. If doEncode +083 * is false, lineLength is ignored. +084 * @param lineSeparator +085 * If doEncode is true, each line of encoded data will be terminated with this byte sequence (e.g. \r\n). +086 * If lineLength <= 0, the lineSeparator is not used. If doEncode is false lineSeparator is ignored. +087 */ +088 public Base64OutputStream(final OutputStream out, final boolean doEncode, +089 final int lineLength, final byte[] lineSeparator) { +090 super(out, new Base64(lineLength, lineSeparator), doEncode); +091 } +092} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.util.Arrays; +021 +022import org.apache.commons.codec.BinaryDecoder; +023import org.apache.commons.codec.BinaryEncoder; +024import org.apache.commons.codec.DecoderException; +025import org.apache.commons.codec.EncoderException; +026 +027/** +028 * Abstract superclass for Base-N encoders and decoders. +029 * +030 * <p> +031 * This class is thread-safe. +032 * </p> +033 * +034 * @version $Id$ +035 */ +036public abstract class BaseNCodec implements BinaryEncoder, BinaryDecoder { +037 +038 /** +039 * Holds thread context so classes can be thread-safe. +040 * +041 * This class is not itself thread-safe; each thread must allocate its own copy. +042 * +043 * @since 1.7 +044 */ +045 static class Context { +046 +047 /** +048 * Place holder for the bytes we're dealing with for our based logic. +049 * Bitwise operations store and extract the encoding or decoding from this variable. +050 */ +051 int ibitWorkArea; +052 +053 /** +054 * Place holder for the bytes we're dealing with for our based logic. +055 * Bitwise operations store and extract the encoding or decoding from this variable. +056 */ +057 long lbitWorkArea; +058 +059 /** +060 * Buffer for streaming. +061 */ +062 byte[] buffer; +063 +064 /** +065 * Position where next character should be written in the buffer. +066 */ +067 int pos; +068 +069 /** +070 * Position where next character should be read from the buffer. +071 */ +072 int readPos; +073 +074 /** +075 * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, +076 * and must be thrown away. +077 */ +078 boolean eof; +079 +080 /** +081 * Variable tracks how many characters have been written to the current line. Only used when encoding. We use +082 * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). +083 */ +084 int currentLinePos; +085 +086 /** +087 * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This +088 * variable helps track that. +089 */ +090 int modulus; +091 +092 Context() { +093 } +094 +095 /** +096 * Returns a String useful for debugging (especially within a debugger.) +097 * +098 * @return a String useful for debugging. +099 */ +100 @SuppressWarnings("boxing") // OK to ignore boxing here +101 @Override +102 public String toString() { +103 return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + +104 "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), +105 currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); +106 } +107 } +108 +109 /** +110 * EOF +111 * +112 * @since 1.7 +113 */ +114 static final int EOF = -1; +115 +116 /** +117 * MIME chunk size per RFC 2045 section 6.8. +118 * +119 * <p> +120 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any +121 * equal signs. +122 * </p> +123 * +124 * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a> +125 */ +126 public static final int MIME_CHUNK_SIZE = 76; +127 +128 /** +129 * PEM chunk size per RFC 1421 section 4.3.2.4. +130 * +131 * <p> +132 * The {@value} character limit does not count the trailing CRLF, but counts all other characters, including any +133 * equal signs. +134 * </p> +135 * +136 * @see <a href="http://tools.ietf.org/html/rfc1421">RFC 1421 section 4.3.2.4</a> +137 */ +138 public static final int PEM_CHUNK_SIZE = 64; +139 +140 private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; +141 +142 /** +143 * Defines the default buffer size - currently {@value} +144 * - must be large enough for at least one encoded block+separator +145 */ +146 private static final int DEFAULT_BUFFER_SIZE = 8192; +147 +148 /** Mask used to extract 8 bits, used in decoding bytes */ +149 protected static final int MASK_8BITS = 0xff; +150 +151 /** +152 * Byte used to pad output. +153 */ +154 protected static final byte PAD_DEFAULT = '='; // Allow static access to default +155 +156 /** +157 * @deprecated Use {@link #pad}. Will be removed in 2.0. +158 */ +159 @Deprecated +160 protected final byte PAD = PAD_DEFAULT; // instance variable just in case it needs to vary later +161 +162 protected final byte pad; // instance variable just in case it needs to vary later +163 +164 /** Number of bytes in each full block of unencoded data, e.g. 4 for Base64 and 5 for Base32 */ +165 private final int unencodedBlockSize; +166 +167 /** Number of bytes in each full block of encoded data, e.g. 3 for Base64 and 8 for Base32 */ +168 private final int encodedBlockSize; +169 +170 /** +171 * Chunksize for encoding. Not used when decoding. +172 * A value of zero or less implies no chunking of the encoded data. +173 * Rounded down to nearest multiple of encodedBlockSize. +174 */ +175 protected final int lineLength; +176 +177 /** +178 * Size of chunk separator. Not used unless {@link #lineLength} > 0. +179 */ +180 private final int chunkSeparatorLength; +181 +182 /** +183 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} +184 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. +185 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) +186 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) +187 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> +188 * @param chunkSeparatorLength the chunk separator length, if relevant +189 */ +190 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, +191 final int lineLength, final int chunkSeparatorLength) { +192 this(unencodedBlockSize, encodedBlockSize, lineLength, chunkSeparatorLength, PAD_DEFAULT); +193 } +194 +195 /** +196 * Note <code>lineLength</code> is rounded down to the nearest multiple of {@link #encodedBlockSize} +197 * If <code>chunkSeparatorLength</code> is zero, then chunking is disabled. +198 * @param unencodedBlockSize the size of an unencoded block (e.g. Base64 = 3) +199 * @param encodedBlockSize the size of an encoded block (e.g. Base64 = 4) +200 * @param lineLength if > 0, use chunking with a length <code>lineLength</code> +201 * @param chunkSeparatorLength the chunk separator length, if relevant +202 * @param pad byte used as padding byte. +203 */ +204 protected BaseNCodec(final int unencodedBlockSize, final int encodedBlockSize, +205 final int lineLength, final int chunkSeparatorLength, final byte pad) { +206 this.unencodedBlockSize = unencodedBlockSize; +207 this.encodedBlockSize = encodedBlockSize; +208 final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; +209 this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; +210 this.chunkSeparatorLength = chunkSeparatorLength; +211 +212 this.pad = pad; +213 } +214 +215 /** +216 * Returns true if this object has buffered data for reading. +217 * +218 * @param context the context to be used +219 * @return true if there is data still available for reading. +220 */ +221 boolean hasData(final Context context) { // package protected for access from I/O streams +222 return context.buffer != null; +223 } +224 +225 /** +226 * Returns the amount of buffered data available for reading. +227 * +228 * @param context the context to be used +229 * @return The amount of buffered data available for reading. +230 */ +231 int available(final Context context) { // package protected for access from I/O streams +232 return context.buffer != null ? context.pos - context.readPos : 0; +233 } +234 +235 /** +236 * Get the default buffer size. Can be overridden. +237 * +238 * @return {@link #DEFAULT_BUFFER_SIZE} +239 */ +240 protected int getDefaultBufferSize() { +241 return DEFAULT_BUFFER_SIZE; +242 } +243 +244 /** +245 * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. +246 * @param context the context to be used +247 */ +248 private byte[] resizeBuffer(final Context context) { +249 if (context.buffer == null) { +250 context.buffer = new byte[getDefaultBufferSize()]; +251 context.pos = 0; +252 context.readPos = 0; +253 } else { +254 final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; +255 System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); +256 context.buffer = b; +257 } +258 return context.buffer; +259 } +260 +261 /** +262 * Ensure that the buffer has room for <code>size</code> bytes +263 * +264 * @param size minimum spare space required +265 * @param context the context to be used +266 * @return the buffer +267 */ +268 protected byte[] ensureBufferSize(final int size, final Context context){ +269 if ((context.buffer == null) || (context.buffer.length < context.pos + size)){ +270 return resizeBuffer(context); +271 } +272 return context.buffer; +273 } +274 +275 /** +276 * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail +277 * bytes. Returns how many bytes were actually extracted. +278 * <p> +279 * Package protected for access from I/O streams. +280 * +281 * @param b +282 * byte[] array to extract the buffered data into. +283 * @param bPos +284 * position in byte[] array to start extraction at. +285 * @param bAvail +286 * amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). +287 * @param context +288 * the context to be used +289 * @return The number of bytes successfully extracted into the provided byte[] array. +290 */ +291 int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { +292 if (context.buffer != null) { +293 final int len = Math.min(available(context), bAvail); +294 System.arraycopy(context.buffer, context.readPos, b, bPos, len); +295 context.readPos += len; +296 if (context.readPos >= context.pos) { +297 context.buffer = null; // so hasData() will return false, and this method can return -1 +298 } +299 return len; +300 } +301 return context.eof ? EOF : 0; +302 } +303 +304 /** +305 * Checks if a byte value is whitespace or not. +306 * Whitespace is taken to mean: space, tab, CR, LF +307 * @param byteToCheck +308 * the byte to check +309 * @return true if byte is whitespace, false otherwise +310 */ +311 protected static boolean isWhiteSpace(final byte byteToCheck) { +312 switch (byteToCheck) { +313 case ' ' : +314 case '\n' : +315 case '\r' : +316 case '\t' : +317 return true; +318 default : +319 return false; +320 } +321 } +322 +323 /** +324 * Encodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of +325 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type byte[]. +326 * +327 * @param obj +328 * Object to encode +329 * @return An object (of type byte[]) containing the Base-N encoded data which corresponds to the byte[] supplied. +330 * @throws EncoderException +331 * if the parameter supplied is not of type byte[] +332 */ +333 @Override +334 public Object encode(final Object obj) throws EncoderException { +335 if (!(obj instanceof byte[])) { +336 throw new EncoderException("Parameter supplied to Base-N encode is not a byte[]"); +337 } +338 return encode((byte[]) obj); +339 } +340 +341 /** +342 * Encodes a byte[] containing binary data, into a String containing characters in the Base-N alphabet. +343 * Uses UTF8 encoding. +344 * +345 * @param pArray +346 * a byte array containing binary data +347 * @return A String containing only Base-N character data +348 */ +349 public String encodeToString(final byte[] pArray) { +350 return StringUtils.newStringUtf8(encode(pArray)); +351 } +352 +353 /** +354 * Encodes a byte[] containing binary data, into a String containing characters in the appropriate alphabet. +355 * Uses UTF8 encoding. +356 * +357 * @param pArray a byte array containing binary data +358 * @return String containing only character data in the appropriate alphabet. +359 * @since 1.5 +360 * This is a duplicate of {@link #encodeToString(byte[])}; it was merged during refactoring. +361 */ +362 public String encodeAsString(final byte[] pArray){ +363 return StringUtils.newStringUtf8(encode(pArray)); +364 } +365 +366 /** +367 * Decodes an Object using the Base-N algorithm. This method is provided in order to satisfy the requirements of +368 * the Decoder interface, and will throw a DecoderException if the supplied object is not of type byte[] or String. +369 * +370 * @param obj +371 * Object to decode +372 * @return An object (of type byte[]) containing the binary data which corresponds to the byte[] or String +373 * supplied. +374 * @throws DecoderException +375 * if the parameter supplied is not of type byte[] +376 */ +377 @Override +378 public Object decode(final Object obj) throws DecoderException { +379 if (obj instanceof byte[]) { +380 return decode((byte[]) obj); +381 } else if (obj instanceof String) { +382 return decode((String) obj); +383 } else { +384 throw new DecoderException("Parameter supplied to Base-N decode is not a byte[] or a String"); +385 } +386 } +387 +388 /** +389 * Decodes a String containing characters in the Base-N alphabet. +390 * +391 * @param pArray +392 * A String containing Base-N character data +393 * @return a byte array containing binary data +394 */ +395 public byte[] decode(final String pArray) { +396 return decode(StringUtils.getBytesUtf8(pArray)); +397 } +398 +399 /** +400 * Decodes a byte[] containing characters in the Base-N alphabet. +401 * +402 * @param pArray +403 * A byte array containing Base-N character data +404 * @return a byte array containing binary data +405 */ +406 @Override +407 public byte[] decode(final byte[] pArray) { +408 if (pArray == null || pArray.length == 0) { +409 return pArray; +410 } +411 final Context context = new Context(); +412 decode(pArray, 0, pArray.length, context); +413 decode(pArray, 0, EOF, context); // Notify decoder of EOF. +414 final byte[] result = new byte[context.pos]; +415 readResults(result, 0, result.length, context); +416 return result; +417 } +418 +419 /** +420 * Encodes a byte[] containing binary data, into a byte[] containing characters in the alphabet. +421 * +422 * @param pArray +423 * a byte array containing binary data +424 * @return A byte array containing only the base N alphabetic character data +425 */ +426 @Override +427 public byte[] encode(final byte[] pArray) { +428 if (pArray == null || pArray.length == 0) { +429 return pArray; +430 } +431 return encode(pArray, 0, pArray.length); +432 } +433 +434 /** +435 * Encodes a byte[] containing binary data, into a byte[] containing +436 * characters in the alphabet. +437 * +438 * @param pArray +439 * a byte array containing binary data +440 * @param offset +441 * initial offset of the subarray. +442 * @param length +443 * length of the subarray. +444 * @return A byte array containing only the base N alphabetic character data +445 * @since 1.11 +446 */ +447 public byte[] encode(final byte[] pArray, final int offset, final int length) { +448 if (pArray == null || pArray.length == 0) { +449 return pArray; +450 } +451 final Context context = new Context(); +452 encode(pArray, offset, length, context); +453 encode(pArray, offset, EOF, context); // Notify encoder of EOF. +454 final byte[] buf = new byte[context.pos - context.readPos]; +455 readResults(buf, 0, buf.length, context); +456 return buf; +457 } +458 +459 // package protected for access from I/O streams +460 abstract void encode(byte[] pArray, int i, int length, Context context); +461 +462 // package protected for access from I/O streams +463 abstract void decode(byte[] pArray, int i, int length, Context context); +464 +465 /** +466 * Returns whether or not the <code>octet</code> is in the current alphabet. +467 * Does not allow whitespace or pad. +468 * +469 * @param value The value to test +470 * +471 * @return <code>true</code> if the value is defined in the current alphabet, <code>false</code> otherwise. +472 */ +473 protected abstract boolean isInAlphabet(byte value); +474 +475 /** +476 * Tests a given byte array to see if it contains only valid characters within the alphabet. +477 * The method optionally treats whitespace and pad as valid. +478 * +479 * @param arrayOctet byte array to test +480 * @param allowWSPad if <code>true</code>, then whitespace and PAD are also allowed +481 * +482 * @return <code>true</code> if all bytes are valid characters in the alphabet or if the byte array is empty; +483 * <code>false</code>, otherwise +484 */ +485 public boolean isInAlphabet(final byte[] arrayOctet, final boolean allowWSPad) { +486 for (final byte octet : arrayOctet) { +487 if (!isInAlphabet(octet) && +488 (!allowWSPad || (octet != pad) && !isWhiteSpace(octet))) { +489 return false; +490 } +491 } +492 return true; +493 } +494 +495 /** +496 * Tests a given String to see if it contains only valid characters within the alphabet. +497 * The method treats whitespace and PAD as valid. +498 * +499 * @param basen String to test +500 * @return <code>true</code> if all characters in the String are valid characters in the alphabet or if +501 * the String is empty; <code>false</code>, otherwise +502 * @see #isInAlphabet(byte[], boolean) +503 */ +504 public boolean isInAlphabet(final String basen) { +505 return isInAlphabet(StringUtils.getBytesUtf8(basen), true); +506 } +507 +508 /** +509 * Tests a given byte array to see if it contains any characters within the alphabet or PAD. +510 * +511 * Intended for use in checking line-ending arrays +512 * +513 * @param arrayOctet +514 * byte array to test +515 * @return <code>true</code> if any byte is a valid character in the alphabet or PAD; <code>false</code> otherwise +516 */ +517 protected boolean containsAlphabetOrPad(final byte[] arrayOctet) { +518 if (arrayOctet == null) { +519 return false; +520 } +521 for (final byte element : arrayOctet) { +522 if (pad == element || isInAlphabet(element)) { +523 return true; +524 } +525 } +526 return false; +527 } +528 +529 /** +530 * Calculates the amount of space needed to encode the supplied array. +531 * +532 * @param pArray byte[] array which will later be encoded +533 * +534 * @return amount of space needed to encoded the supplied array. +535 * Returns a long since a max-len array will require > Integer.MAX_VALUE +536 */ +537 public long getEncodedLength(final byte[] pArray) { +538 // Calculate non-chunked size - rounded up to allow for padding +539 // cast to long is needed to avoid possibility of overflow +540 long len = ((pArray.length + unencodedBlockSize-1) / unencodedBlockSize) * (long) encodedBlockSize; +541 if (lineLength > 0) { // We're using chunking +542 // Round up to nearest multiple +543 len += ((len + lineLength-1) / lineLength) * chunkSeparatorLength; +544 } +545 return len; +546 } +547} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import static org.apache.commons.codec.binary.BaseNCodec.EOF; +021 +022import java.io.FilterInputStream; +023import java.io.IOException; +024import java.io.InputStream; +025 +026import org.apache.commons.codec.binary.BaseNCodec.Context; +027 +028/** +029 * Abstract superclass for Base-N input streams. +030 * +031 * @since 1.5 +032 * @version $Id$ +033 */ +034public class BaseNCodecInputStream extends FilterInputStream { +035 +036 private final BaseNCodec baseNCodec; +037 +038 private final boolean doEncode; +039 +040 private final byte[] singleByte = new byte[1]; +041 +042 private final Context context = new Context(); +043 +044 protected BaseNCodecInputStream(final InputStream in, final BaseNCodec baseNCodec, final boolean doEncode) { +045 super(in); +046 this.doEncode = doEncode; +047 this.baseNCodec = baseNCodec; +048 } +049 +050 /** +051 * {@inheritDoc} +052 * +053 * @return <code>0</code> if the {@link InputStream} has reached <code>EOF</code>, +054 * <code>1</code> otherwise +055 * @since 1.7 +056 */ +057 @Override +058 public int available() throws IOException { +059 // Note: the logic is similar to the InflaterInputStream: +060 // as long as we have not reached EOF, indicate that there is more +061 // data available. As we do not know for sure how much data is left, +062 // just return 1 as a safe guess. +063 +064 return context.eof ? 0 : 1; +065 } +066 +067 /** +068 * Marks the current position in this input stream. +069 * <p>The {@link #mark} method of {@link BaseNCodecInputStream} does nothing.</p> +070 * +071 * @param readLimit the maximum limit of bytes that can be read before the mark position becomes invalid. +072 * @since 1.7 +073 */ +074 @Override +075 public synchronized void mark(final int readLimit) { +076 } +077 +078 /** +079 * {@inheritDoc} +080 * +081 * @return always returns <code>false</code> +082 */ +083 @Override +084 public boolean markSupported() { +085 return false; // not an easy job to support marks +086 } +087 +088 /** +089 * Reads one <code>byte</code> from this input stream. +090 * +091 * @return the byte as an integer in the range 0 to 255. Returns -1 if EOF has been reached. +092 * @throws IOException +093 * if an I/O error occurs. +094 */ +095 @Override +096 public int read() throws IOException { +097 int r = read(singleByte, 0, 1); +098 while (r == 0) { +099 r = read(singleByte, 0, 1); +100 } +101 if (r > 0) { +102 final byte b = singleByte[0]; +103 return b < 0 ? 256 + b : b; +104 } +105 return EOF; +106 } +107 +108 /** +109 * Attempts to read <code>len</code> bytes into the specified <code>b</code> array starting at <code>offset</code> +110 * from this InputStream. +111 * +112 * @param b +113 * destination byte array +114 * @param offset +115 * where to start writing the bytes +116 * @param len +117 * maximum number of bytes to read +118 * +119 * @return number of bytes read +120 * @throws IOException +121 * if an I/O error occurs. +122 * @throws NullPointerException +123 * if the byte array parameter is null +124 * @throws IndexOutOfBoundsException +125 * if offset, len or buffer size are invalid +126 */ +127 @Override +128 public int read(final byte b[], final int offset, final int len) throws IOException { +129 if (b == null) { +130 throw new NullPointerException(); +131 } else if (offset < 0 || len < 0) { +132 throw new IndexOutOfBoundsException(); +133 } else if (offset > b.length || offset + len > b.length) { +134 throw new IndexOutOfBoundsException(); +135 } else if (len == 0) { +136 return 0; +137 } else { +138 int readLen = 0; +139 /* +140 Rationale for while-loop on (readLen == 0): +141 ----- +142 Base32.readResults() usually returns > 0 or EOF (-1). In the +143 rare case where it returns 0, we just keep trying. +144 +145 This is essentially an undocumented contract for InputStream +146 implementors that want their code to work properly with +147 java.io.InputStreamReader, since the latter hates it when +148 InputStream.read(byte[]) returns a zero. Unfortunately our +149 readResults() call must return 0 if a large amount of the data +150 being decoded was non-base32, so this while-loop enables proper +151 interop with InputStreamReader for that scenario. +152 ----- +153 This is a fix for CODEC-101 +154 */ +155 while (readLen == 0) { +156 if (!baseNCodec.hasData(context)) { +157 final byte[] buf = new byte[doEncode ? 4096 : 8192]; +158 final int c = in.read(buf); +159 if (doEncode) { +160 baseNCodec.encode(buf, 0, c, context); +161 } else { +162 baseNCodec.decode(buf, 0, c, context); +163 } +164 } +165 readLen = baseNCodec.readResults(b, offset, len, context); +166 } +167 return readLen; +168 } +169 } +170 +171 /** +172 * Repositions this stream to the position at the time the mark method was last called on this input stream. +173 * <p> +174 * The {@link #reset} method of {@link BaseNCodecInputStream} does nothing except throw an {@link IOException}. +175 * +176 * @throws IOException if this method is invoked +177 * @since 1.7 +178 */ +179 @Override +180 public synchronized void reset() throws IOException { +181 throw new IOException("mark/reset not supported"); +182 } +183 +184 /** +185 * {@inheritDoc} +186 * +187 * @throws IllegalArgumentException if the provided skip length is negative +188 * @since 1.7 +189 */ +190 @Override +191 public long skip(final long n) throws IOException { +192 if (n < 0) { +193 throw new IllegalArgumentException("Negative skip length: " + n); +194 } +195 +196 // skip in chunks of 512 bytes +197 final byte[] b = new byte[512]; +198 long todo = n; +199 +200 while (todo > 0) { +201 int len = (int) Math.min(b.length, todo); +202 len = this.read(b, 0, len); +203 if (len == EOF) { +204 break; +205 } +206 todo -= len; +207 } +208 +209 return n - todo; +210 } +211} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import static org.apache.commons.codec.binary.BaseNCodec.EOF; +021 +022import java.io.FilterOutputStream; +023import java.io.IOException; +024import java.io.OutputStream; +025 +026import org.apache.commons.codec.binary.BaseNCodec.Context; +027 +028/** +029 * Abstract superclass for Base-N output streams. +030 * <p> +031 * To write the EOF marker without closing the stream, call {@link #eof()} or use an <a +032 * href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href= +033 * "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html" +034 * >CloseShieldOutputStream</a>. +035 * </p> +036 * +037 * @since 1.5 +038 * @version $Id$ +039 */ +040public class BaseNCodecOutputStream extends FilterOutputStream { +041 +042 private final boolean doEncode; +043 +044 private final BaseNCodec baseNCodec; +045 +046 private final byte[] singleByte = new byte[1]; +047 +048 private final Context context = new Context(); +049 +050 // TODO should this be protected? +051 public BaseNCodecOutputStream(final OutputStream out, final BaseNCodec basedCodec, final boolean doEncode) { +052 super(out); +053 this.baseNCodec = basedCodec; +054 this.doEncode = doEncode; +055 } +056 +057 /** +058 * Writes the specified <code>byte</code> to this output stream. +059 * +060 * @param i +061 * source byte +062 * @throws IOException +063 * if an I/O error occurs. +064 */ +065 @Override +066 public void write(final int i) throws IOException { +067 singleByte[0] = (byte) i; +068 write(singleByte, 0, 1); +069 } +070 +071 /** +072 * Writes <code>len</code> bytes from the specified <code>b</code> array starting at <code>offset</code> to this +073 * output stream. +074 * +075 * @param b +076 * source byte array +077 * @param offset +078 * where to start reading the bytes +079 * @param len +080 * maximum number of bytes to write +081 * +082 * @throws IOException +083 * if an I/O error occurs. +084 * @throws NullPointerException +085 * if the byte array parameter is null +086 * @throws IndexOutOfBoundsException +087 * if offset, len or buffer size are invalid +088 */ +089 @Override +090 public void write(final byte b[], final int offset, final int len) throws IOException { +091 if (b == null) { +092 throw new NullPointerException(); +093 } else if (offset < 0 || len < 0) { +094 throw new IndexOutOfBoundsException(); +095 } else if (offset > b.length || offset + len > b.length) { +096 throw new IndexOutOfBoundsException(); +097 } else if (len > 0) { +098 if (doEncode) { +099 baseNCodec.encode(b, offset, len, context); +100 } else { +101 baseNCodec.decode(b, offset, len, context); +102 } +103 flush(false); +104 } +105 } +106 +107 /** +108 * Flushes this output stream and forces any buffered output bytes to be written out to the stream. If propagate is +109 * true, the wrapped stream will also be flushed. +110 * +111 * @param propagate +112 * boolean flag to indicate whether the wrapped OutputStream should also be flushed. +113 * @throws IOException +114 * if an I/O error occurs. +115 */ +116 private void flush(final boolean propagate) throws IOException { +117 final int avail = baseNCodec.available(context); +118 if (avail > 0) { +119 final byte[] buf = new byte[avail]; +120 final int c = baseNCodec.readResults(buf, 0, avail, context); +121 if (c > 0) { +122 out.write(buf, 0, c); +123 } +124 } +125 if (propagate) { +126 out.flush(); +127 } +128 } +129 +130 /** +131 * Flushes this output stream and forces any buffered output bytes to be written out to the stream. +132 * +133 * @throws IOException +134 * if an I/O error occurs. +135 */ +136 @Override +137 public void flush() throws IOException { +138 flush(true); +139 } +140 +141 /** +142 * Closes this output stream and releases any system resources associated with the stream. +143 * <p> +144 * To write the EOF marker without closing the stream, call {@link #eof()} or use an +145 * <a href="https://commons.apache.org/proper/commons-io/">Apache Commons IO</a> <a href= +146 * "https://commons.apache.org/proper/commons-io/apidocs/org/apache/commons/io/output/CloseShieldOutputStream.html" +147 * >CloseShieldOutputStream</a>. +148 * </p> +149 * +150 * @throws IOException +151 * if an I/O error occurs. +152 */ +153 @Override +154 public void close() throws IOException { +155 eof(); +156 flush(); +157 out.close(); +158 } +159 +160 /** +161 * Writes EOF. +162 * +163 * @throws IOException +164 * if an I/O error occurs. +165 * @since 1.11 +166 */ +167 public void eof() throws IOException { +168 // Notify encoder of EOF (-1). +169 if (doEncode) { +170 baseNCodec.encode(singleByte, 0, EOF, context); +171 } else { +172 baseNCodec.decode(singleByte, 0, EOF, context); +173 } +174 } +175 +176} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import org.apache.commons.codec.BinaryDecoder; +021import org.apache.commons.codec.BinaryEncoder; +022import org.apache.commons.codec.DecoderException; +023import org.apache.commons.codec.EncoderException; +024 +025/** +026 * Converts between byte arrays and strings of "0"s and "1"s. +027 * +028 * <p>This class is immutable and thread-safe.</p> +029 * +030 * TODO: may want to add more bit vector functions like and/or/xor/nand +031 * TODO: also might be good to generate boolean[] from byte[] et cetera. +032 * +033 * @since 1.3 +034 * @version $Id$ +035 */ +036public class BinaryCodec implements BinaryDecoder, BinaryEncoder { +037 /* +038 * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth +039 * it. +040 */ +041 /** Empty char array. */ +042 private static final char[] EMPTY_CHAR_ARRAY = new char[0]; +043 +044 /** Empty byte array. */ +045 private static final byte[] EMPTY_BYTE_ARRAY = new byte[0]; +046 +047 /** Mask for bit 0 of a byte. */ +048 private static final int BIT_0 = 1; +049 +050 /** Mask for bit 1 of a byte. */ +051 private static final int BIT_1 = 0x02; +052 +053 /** Mask for bit 2 of a byte. */ +054 private static final int BIT_2 = 0x04; +055 +056 /** Mask for bit 3 of a byte. */ +057 private static final int BIT_3 = 0x08; +058 +059 /** Mask for bit 4 of a byte. */ +060 private static final int BIT_4 = 0x10; +061 +062 /** Mask for bit 5 of a byte. */ +063 private static final int BIT_5 = 0x20; +064 +065 /** Mask for bit 6 of a byte. */ +066 private static final int BIT_6 = 0x40; +067 +068 /** Mask for bit 7 of a byte. */ +069 private static final int BIT_7 = 0x80; +070 +071 private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7}; +072 +073 /** +074 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters. +075 * +076 * @param raw +077 * the raw binary data to convert +078 * @return 0 and 1 ASCII character bytes one for each bit of the argument +079 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) +080 */ +081 @Override +082 public byte[] encode(final byte[] raw) { +083 return toAsciiBytes(raw); +084 } +085 +086 /** +087 * Converts an array of raw binary data into an array of ASCII 0 and 1 chars. +088 * +089 * @param raw +090 * the raw binary data to convert +091 * @return 0 and 1 ASCII character chars one for each bit of the argument +092 * @throws EncoderException +093 * if the argument is not a byte[] +094 * @see org.apache.commons.codec.Encoder#encode(Object) +095 */ +096 @Override +097 public Object encode(final Object raw) throws EncoderException { +098 if (!(raw instanceof byte[])) { +099 throw new EncoderException("argument not a byte array"); +100 } +101 return toAsciiChars((byte[]) raw); +102 } +103 +104 /** +105 * Decodes a byte array where each byte represents an ASCII '0' or '1'. +106 * +107 * @param ascii +108 * each byte represents an ASCII '0' or '1' +109 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument +110 * @throws DecoderException +111 * if argument is not a byte[], char[] or String +112 * @see org.apache.commons.codec.Decoder#decode(Object) +113 */ +114 @Override +115 public Object decode(final Object ascii) throws DecoderException { +116 if (ascii == null) { +117 return EMPTY_BYTE_ARRAY; +118 } +119 if (ascii instanceof byte[]) { +120 return fromAscii((byte[]) ascii); +121 } +122 if (ascii instanceof char[]) { +123 return fromAscii((char[]) ascii); +124 } +125 if (ascii instanceof String) { +126 return fromAscii(((String) ascii).toCharArray()); +127 } +128 throw new DecoderException("argument not a byte array"); +129 } +130 +131 /** +132 * Decodes a byte array where each byte represents an ASCII '0' or '1'. +133 * +134 * @param ascii +135 * each byte represents an ASCII '0' or '1' +136 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument +137 * @see org.apache.commons.codec.Decoder#decode(Object) +138 */ +139 @Override +140 public byte[] decode(final byte[] ascii) { +141 return fromAscii(ascii); +142 } +143 +144 /** +145 * Decodes a String where each char of the String represents an ASCII '0' or '1'. +146 * +147 * @param ascii +148 * String of '0' and '1' characters +149 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument +150 * @see org.apache.commons.codec.Decoder#decode(Object) +151 */ +152 public byte[] toByteArray(final String ascii) { +153 if (ascii == null) { +154 return EMPTY_BYTE_ARRAY; +155 } +156 return fromAscii(ascii.toCharArray()); +157 } +158 +159 // ------------------------------------------------------------------------ +160 // +161 // static codec operations +162 // +163 // ------------------------------------------------------------------------ +164 /** +165 * Decodes a char array where each char represents an ASCII '0' or '1'. +166 * +167 * @param ascii +168 * each char represents an ASCII '0' or '1' +169 * @return the raw encoded binary where each bit corresponds to a char in the char array argument +170 */ +171 public static byte[] fromAscii(final char[] ascii) { +172 if (ascii == null || ascii.length == 0) { +173 return EMPTY_BYTE_ARRAY; +174 } +175 // get length/8 times bytes with 3 bit shifts to the right of the length +176 final byte[] l_raw = new byte[ascii.length >> 3]; +177 /* +178 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the +179 * loop. +180 */ +181 for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { +182 for (int bits = 0; bits < BITS.length; ++bits) { +183 if (ascii[jj - bits] == '1') { +184 l_raw[ii] |= BITS[bits]; +185 } +186 } +187 } +188 return l_raw; +189 } +190 +191 /** +192 * Decodes a byte array where each byte represents an ASCII '0' or '1'. +193 * +194 * @param ascii +195 * each byte represents an ASCII '0' or '1' +196 * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument +197 */ +198 public static byte[] fromAscii(final byte[] ascii) { +199 if (isEmpty(ascii)) { +200 return EMPTY_BYTE_ARRAY; +201 } +202 // get length/8 times bytes with 3 bit shifts to the right of the length +203 final byte[] l_raw = new byte[ascii.length >> 3]; +204 /* +205 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the +206 * loop. +207 */ +208 for (int ii = 0, jj = ascii.length - 1; ii < l_raw.length; ii++, jj -= 8) { +209 for (int bits = 0; bits < BITS.length; ++bits) { +210 if (ascii[jj - bits] == '1') { +211 l_raw[ii] |= BITS[bits]; +212 } +213 } +214 } +215 return l_raw; +216 } +217 +218 /** +219 * Returns <code>true</code> if the given array is <code>null</code> or empty (size 0.) +220 * +221 * @param array +222 * the source array +223 * @return <code>true</code> if the given array is <code>null</code> or empty (size 0.) +224 */ +225 private static boolean isEmpty(final byte[] array) { +226 return array == null || array.length == 0; +227 } +228 +229 /** +230 * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated +231 * char. +232 * +233 * @param raw +234 * the raw binary data to convert +235 * @return an array of 0 and 1 character bytes for each bit of the argument +236 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) +237 */ +238 public static byte[] toAsciiBytes(final byte[] raw) { +239 if (isEmpty(raw)) { +240 return EMPTY_BYTE_ARRAY; +241 } +242 // get 8 times the bytes with 3 bit shifts to the left of the length +243 final byte[] l_ascii = new byte[raw.length << 3]; +244 /* +245 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the +246 * loop. +247 */ +248 for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { +249 for (int bits = 0; bits < BITS.length; ++bits) { +250 if ((raw[ii] & BITS[bits]) == 0) { +251 l_ascii[jj - bits] = '0'; +252 } else { +253 l_ascii[jj - bits] = '1'; +254 } +255 } +256 } +257 return l_ascii; +258 } +259 +260 /** +261 * Converts an array of raw binary data into an array of ASCII 0 and 1 characters. +262 * +263 * @param raw +264 * the raw binary data to convert +265 * @return an array of 0 and 1 characters for each bit of the argument +266 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) +267 */ +268 public static char[] toAsciiChars(final byte[] raw) { +269 if (isEmpty(raw)) { +270 return EMPTY_CHAR_ARRAY; +271 } +272 // get 8 times the bytes with 3 bit shifts to the left of the length +273 final char[] l_ascii = new char[raw.length << 3]; +274 /* +275 * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the +276 * loop. +277 */ +278 for (int ii = 0, jj = l_ascii.length - 1; ii < raw.length; ii++, jj -= 8) { +279 for (int bits = 0; bits < BITS.length; ++bits) { +280 if ((raw[ii] & BITS[bits]) == 0) { +281 l_ascii[jj - bits] = '0'; +282 } else { +283 l_ascii[jj - bits] = '1'; +284 } +285 } +286 } +287 return l_ascii; +288 } +289 +290 /** +291 * Converts an array of raw binary data into a String of ASCII 0 and 1 characters. +292 * +293 * @param raw +294 * the raw binary data to convert +295 * @return a String of 0 and 1 characters representing the binary data +296 * @see org.apache.commons.codec.BinaryEncoder#encode(byte[]) +297 */ +298 public static String toAsciiString(final byte[] raw) { +299 return new String(toAsciiChars(raw)); +300 } +301} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.binary; +018 +019/** +020 * <p> +021 * Operations on {@link CharSequence} that are <code>null</code> safe. +022 * </p> +023 * <p> +024 * Copied from Apache Commons Lang r1586295 on April 10, 2014 (day of 3.3.2 release). +025 * </p> +026 * +027 * @see CharSequence +028 * @since 1.10 +029 */ +030public class CharSequenceUtils { +031 +032 /** +033 * Green implementation of regionMatches. +034 * +035 * @param cs +036 * the <code>CharSequence</code> to be processed +037 * @param ignoreCase +038 * whether or not to be case insensitive +039 * @param thisStart +040 * the index to start on the <code>cs</code> CharSequence +041 * @param substring +042 * the <code>CharSequence</code> to be looked for +043 * @param start +044 * the index to start on the <code>substring</code> CharSequence +045 * @param length +046 * character length of the region +047 * @return whether the region matched +048 */ +049 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, +050 final CharSequence substring, final int start, final int length) { +051 if (cs instanceof String && substring instanceof String) { +052 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); +053 } +054 int index1 = thisStart; +055 int index2 = start; +056 int tmpLen = length; +057 +058 while (tmpLen-- > 0) { +059 final char c1 = cs.charAt(index1++); +060 final char c2 = substring.charAt(index2++); +061 +062 if (c1 == c2) { +063 continue; +064 } +065 +066 if (!ignoreCase) { +067 return false; +068 } +069 +070 // The same check as in String.regionMatches(): +071 if (Character.toUpperCase(c1) != Character.toUpperCase(c2) && +072 Character.toLowerCase(c1) != Character.toLowerCase(c2)) { +073 return false; +074 } +075 } +076 +077 return true; +078 } +079} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.nio.ByteBuffer; +021import java.nio.charset.Charset; +022 +023import org.apache.commons.codec.BinaryDecoder; +024import org.apache.commons.codec.BinaryEncoder; +025import org.apache.commons.codec.CharEncoding; +026import org.apache.commons.codec.Charsets; +027import org.apache.commons.codec.DecoderException; +028import org.apache.commons.codec.EncoderException; +029 +030/** +031 * Converts hexadecimal Strings. The charset used for certain operation can be set, the default is set in +032 * {@link #DEFAULT_CHARSET_NAME} +033 * +034 * This class is thread-safe. +035 * +036 * @since 1.1 +037 * @version $Id$ +038 */ +039public class Hex implements BinaryEncoder, BinaryDecoder { +040 +041 /** +042 * Default charset is {@link Charsets#UTF_8} +043 * +044 * @since 1.7 +045 */ +046 public static final Charset DEFAULT_CHARSET = Charsets.UTF_8; +047 +048 /** +049 * Default charset name is {@link CharEncoding#UTF_8} +050 * +051 * @since 1.4 +052 */ +053 public static final String DEFAULT_CHARSET_NAME = CharEncoding.UTF_8; +054 +055 /** +056 * Used to build output as Hex +057 */ +058 private static final char[] DIGITS_LOWER = +059 {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; +060 +061 /** +062 * Used to build output as Hex +063 */ +064 private static final char[] DIGITS_UPPER = +065 {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; +066 +067 /** +068 * Converts a String representing hexadecimal values into an array of bytes of those same values. The +069 * returned array will be half the length of the passed String, as it takes two characters to represent any given +070 * byte. An exception is thrown if the passed String has an odd number of elements. +071 * +072 * @param data +073 * A String containing hexadecimal digits +074 * @return A byte array containing binary data decoded from the supplied char array. +075 * @throws DecoderException +076 * Thrown if an odd number or illegal of characters is supplied +077 * @since 1.11 +078 */ +079 public static byte[] decodeHex(final String data) throws DecoderException { +080 return decodeHex(data.toCharArray()); +081 } +082 +083 /** +084 * Converts an array of characters representing hexadecimal values into an array of bytes of those same values. The +085 * returned array will be half the length of the passed array, as it takes two characters to represent any given +086 * byte. An exception is thrown if the passed char array has an odd number of elements. +087 * +088 * @param data +089 * An array of characters containing hexadecimal digits +090 * @return A byte array containing binary data decoded from the supplied char array. +091 * @throws DecoderException +092 * Thrown if an odd number or illegal of characters is supplied +093 */ +094 public static byte[] decodeHex(final char[] data) throws DecoderException { +095 +096 final int len = data.length; +097 +098 if ((len & 0x01) != 0) { +099 throw new DecoderException("Odd number of characters."); +100 } +101 +102 final byte[] out = new byte[len >> 1]; +103 +104 // two characters form the hex value. +105 for (int i = 0, j = 0; j < len; i++) { +106 int f = toDigit(data[j], j) << 4; +107 j++; +108 f = f | toDigit(data[j], j); +109 j++; +110 out[i] = (byte) (f & 0xFF); +111 } +112 +113 return out; +114 } +115 +116 /** +117 * Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. +118 * The returned array will be double the length of the passed array, as it takes two characters to represent any +119 * given byte. +120 * +121 * @param data +122 * a byte[] to convert to Hex characters +123 * @return A char[] containing lower-case hexadecimal characters +124 */ +125 public static char[] encodeHex(final byte[] data) { +126 return encodeHex(data, true); +127 } +128 +129 /** +130 * Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. +131 * The returned array will be double the length of the passed array, as it takes two characters to represent any +132 * given byte. +133 * +134 * @param data +135 * a byte buffer to convert to Hex characters +136 * @return A char[] containing lower-case hexadecimal characters +137 * @since 1.11 +138 */ +139 public static char[] encodeHex(final ByteBuffer data) { +140 return encodeHex(data, true); +141 } +142 +143 /** +144 * Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. +145 * The returned array will be double the length of the passed array, as it takes two characters to represent any +146 * given byte. +147 * +148 * @param data +149 * a byte[] to convert to Hex characters +150 * @param toLowerCase +151 * <code>true</code> converts to lowercase, <code>false</code> to uppercase +152 * @return A char[] containing hexadecimal characters in the selected case +153 * @since 1.4 +154 */ +155 public static char[] encodeHex(final byte[] data, final boolean toLowerCase) { +156 return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER); +157 } +158 +159 /** +160 * Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. +161 * The returned array will be double the length of the passed array, as it takes two characters to represent any +162 * given byte. +163 * +164 * @param data +165 * a byte buffer to convert to Hex characters +166 * @param toLowerCase +167 * <code>true</code> converts to lowercase, <code>false</code> to uppercase +168 * @return A char[] containing hexadecimal characters in the selected case +169 * @since 1.11 +170 */ +171 public static char[] encodeHex(final ByteBuffer data, final boolean toLowerCase) { +172 return encodeHex(data, toLowerCase ? DIGITS_LOWER : DIGITS_UPPER); +173 } +174 +175 /** +176 * Converts an array of bytes into an array of characters representing the hexadecimal values of each byte in order. +177 * The returned array will be double the length of the passed array, as it takes two characters to represent any +178 * given byte. +179 * +180 * @param data +181 * a byte[] to convert to Hex characters +182 * @param toDigits +183 * the output alphabet (must contain at least 16 chars) +184 * @return A char[] containing the appropriate characters from the alphabet +185 * For best results, this should be either upper- or lower-case hex. +186 * @since 1.4 +187 */ +188 protected static char[] encodeHex(final byte[] data, final char[] toDigits) { +189 final int l = data.length; +190 final char[] out = new char[l << 1]; +191 // two characters form the hex value. +192 for (int i = 0, j = 0; i < l; i++) { +193 out[j++] = toDigits[(0xF0 & data[i]) >>> 4]; +194 out[j++] = toDigits[0x0F & data[i]]; +195 } +196 return out; +197 } +198 +199 /** +200 * Converts a byte buffer into an array of characters representing the hexadecimal values of each byte in order. +201 * The returned array will be double the length of the passed array, as it takes two characters to represent any +202 * given byte. +203 * +204 * @param data +205 * a byte buffer to convert to Hex characters +206 * @param toDigits +207 * the output alphabet (must be at least 16 characters) +208 * @return A char[] containing the appropriate characters from the alphabet +209 * For best results, this should be either upper- or lower-case hex. +210 * @since 1.11 +211 */ +212 protected static char[] encodeHex(final ByteBuffer data, final char[] toDigits) { +213 return encodeHex(data.array(), toDigits); +214 } +215 +216 /** +217 * Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned +218 * String will be double the length of the passed array, as it takes two characters to represent any given byte. +219 * +220 * @param data +221 * a byte[] to convert to Hex characters +222 * @return A String containing lower-case hexadecimal characters +223 * @since 1.4 +224 */ +225 public static String encodeHexString(final byte[] data) { +226 return new String(encodeHex(data)); +227 } +228 +229 /** +230 * Converts an array of bytes into a String representing the hexadecimal values of each byte in order. The returned +231 * String will be double the length of the passed array, as it takes two characters to represent any given byte. +232 * +233 * @param data +234 * a byte[] to convert to Hex characters +235 * @param toLowerCase +236 * <code>true</code> converts to lowercase, <code>false</code> to uppercase +237 * @return A String containing lower-case hexadecimal characters +238 * @since 1.11 +239 */ +240 public static String encodeHexString(final byte[] data, final boolean toLowerCase) { +241 return new String(encodeHex(data, toLowerCase)); +242 } +243 +244 /** +245 * Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned +246 * String will be double the length of the passed array, as it takes two characters to represent any given byte. +247 * +248 * @param data +249 * a byte buffer to convert to Hex characters +250 * @return A String containing lower-case hexadecimal characters +251 * @since 1.11 +252 */ +253 public static String encodeHexString(final ByteBuffer data) { +254 return new String(encodeHex(data)); +255 } +256 +257 /** +258 * Converts a byte buffer into a String representing the hexadecimal values of each byte in order. The returned +259 * String will be double the length of the passed array, as it takes two characters to represent any given byte. +260 * +261 * @param data +262 * a byte buffer to convert to Hex characters +263 * @param toLowerCase +264 * <code>true</code> converts to lowercase, <code>false</code> to uppercase +265 * @return A String containing lower-case hexadecimal characters +266 * @since 1.11 +267 */ +268 public static String encodeHexString(final ByteBuffer data, final boolean toLowerCase) { +269 return new String(encodeHex(data, toLowerCase)); +270 } +271 +272 /** +273 * Converts a hexadecimal character to an integer. +274 * +275 * @param ch +276 * A character to convert to an integer digit +277 * @param index +278 * The index of the character in the source +279 * @return An integer +280 * @throws DecoderException +281 * Thrown if ch is an illegal hex character +282 */ +283 protected static int toDigit(final char ch, final int index) throws DecoderException { +284 final int digit = Character.digit(ch, 16); +285 if (digit == -1) { +286 throw new DecoderException("Illegal hexadecimal character " + ch + " at index " + index); +287 } +288 return digit; +289 } +290 +291 private final Charset charset; +292 +293 /** +294 * Creates a new codec with the default charset name {@link #DEFAULT_CHARSET} +295 */ +296 public Hex() { +297 // use default encoding +298 this.charset = DEFAULT_CHARSET; +299 } +300 +301 /** +302 * Creates a new codec with the given Charset. +303 * +304 * @param charset +305 * the charset. +306 * @since 1.7 +307 */ +308 public Hex(final Charset charset) { +309 this.charset = charset; +310 } +311 +312 /** +313 * Creates a new codec with the given charset name. +314 * +315 * @param charsetName +316 * the charset name. +317 * @throws java.nio.charset.UnsupportedCharsetException +318 * If the named charset is unavailable +319 * @since 1.4 +320 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable +321 */ +322 public Hex(final String charsetName) { +323 this(Charset.forName(charsetName)); +324 } +325 +326 /** +327 * Converts an array of character bytes representing hexadecimal values into an array of bytes of those same values. +328 * The returned array will be half the length of the passed array, as it takes two characters to represent any given +329 * byte. An exception is thrown if the passed char array has an odd number of elements. +330 * +331 * @param array +332 * An array of character bytes containing hexadecimal digits +333 * @return A byte array containing binary data decoded from the supplied byte array (representing characters). +334 * @throws DecoderException +335 * Thrown if an odd number of characters is supplied to this function +336 * @see #decodeHex(char[]) +337 */ +338 @Override +339 public byte[] decode(final byte[] array) throws DecoderException { +340 return decodeHex(new String(array, getCharset()).toCharArray()); +341 } +342 +343 /** +344 * Converts a buffer of character bytes representing hexadecimal values into an array of bytes of those same values. +345 * The returned array will be half the length of the passed array, as it takes two characters to represent any given +346 * byte. An exception is thrown if the passed char array has an odd number of elements. +347 * +348 * @param buffer +349 * An array of character bytes containing hexadecimal digits +350 * @return A byte array containing binary data decoded from the supplied byte array (representing characters). +351 * @throws DecoderException +352 * Thrown if an odd number of characters is supplied to this function +353 * @see #decodeHex(char[]) +354 * @since 1.11 +355 */ +356 public byte[] decode(final ByteBuffer buffer) throws DecoderException { +357 return decodeHex(new String(buffer.array(), getCharset()).toCharArray()); +358 } +359 +360 /** +361 * Converts a String or an array of character bytes representing hexadecimal values into an array of bytes of those +362 * same values. The returned array will be half the length of the passed String or array, as it takes two characters +363 * to represent any given byte. An exception is thrown if the passed char array has an odd number of elements. +364 * +365 * @param object +366 * A String, ByteBuffer, byte[], or an array of character bytes containing hexadecimal digits +367 * @return A byte array containing binary data decoded from the supplied byte array (representing characters). +368 * @throws DecoderException +369 * Thrown if an odd number of characters is supplied to this function or the object is not a String or +370 * char[] +371 * @see #decodeHex(char[]) +372 */ +373 @Override +374 public Object decode(final Object object) throws DecoderException { +375 if (object instanceof String) { +376 return decode(((String) object).toCharArray()); +377 } else if (object instanceof byte[]) { +378 return decode((byte[]) object); +379 } else if (object instanceof ByteBuffer) { +380 return decode((ByteBuffer) object); +381 } else { +382 try { +383 return decodeHex((char[]) object); +384 } catch (final ClassCastException e) { +385 throw new DecoderException(e.getMessage(), e); +386 } +387 } +388 } +389 +390 /** +391 * Converts an array of bytes into an array of bytes for the characters representing the hexadecimal values of each +392 * byte in order. The returned array will be double the length of the passed array, as it takes two characters to +393 * represent any given byte. +394 * <p> +395 * The conversion from hexadecimal characters to the returned bytes is performed with the charset named by +396 * {@link #getCharset()}. +397 * </p> +398 * +399 * @param array +400 * a byte[] to convert to Hex characters +401 * @return A byte[] containing the bytes of the lower-case hexadecimal characters +402 * @since 1.7 No longer throws IllegalStateException if the charsetName is invalid. +403 * @see #encodeHex(byte[]) +404 */ +405 @Override +406 public byte[] encode(final byte[] array) { +407 return encodeHexString(array).getBytes(this.getCharset()); +408 } +409 +410 /** +411 * Converts byte buffer into an array of bytes for the characters representing the hexadecimal values of each +412 * byte in order. The returned array will be double the length of the passed array, as it takes two characters to +413 * represent any given byte. +414 * <p> +415 * The conversion from hexadecimal characters to the returned bytes is performed with the charset named by +416 * {@link #getCharset()}. +417 * </p> +418 * +419 * @param array +420 * a byte buffer to convert to Hex characters +421 * @return A byte[] containing the bytes of the lower-case hexadecimal characters +422 * @see #encodeHex(byte[]) +423 * @since 1.11 +424 */ +425 public byte[] encode(final ByteBuffer array) { +426 return encodeHexString(array).getBytes(this.getCharset()); +427 } +428 +429 /** +430 * Converts a String or an array of bytes into an array of characters representing the hexadecimal values of each +431 * byte in order. The returned array will be double the length of the passed String or array, as it takes two +432 * characters to represent any given byte. +433 * <p> +434 * The conversion from hexadecimal characters to bytes to be encoded to performed with the charset named by +435 * {@link #getCharset()}. +436 * </p> +437 * +438 * @param object +439 * a String, ByteBuffer, or byte[] to convert to Hex characters +440 * @return A char[] containing lower-case hexadecimal characters +441 * @throws EncoderException +442 * Thrown if the given object is not a String or byte[] +443 * @see #encodeHex(byte[]) +444 */ +445 @Override +446 public Object encode(final Object object) throws EncoderException { +447 byte[] byteArray; +448 if (object instanceof String) { +449 byteArray = ((String) object).getBytes(this.getCharset()); +450 } else if (object instanceof ByteBuffer) { +451 byteArray = ((ByteBuffer) object).array(); +452 } else { +453 try { +454 byteArray = (byte[]) object; +455 } catch (final ClassCastException e) { +456 throw new EncoderException(e.getMessage(), e); +457 } +458 } +459 return encodeHex(byteArray); +460 } +461 +462 /** +463 * Gets the charset. +464 * +465 * @return the charset. +466 * @since 1.7 +467 */ +468 public Charset getCharset() { +469 return this.charset; +470 } +471 +472 /** +473 * Gets the charset name. +474 * +475 * @return the charset name. +476 * @since 1.4 +477 */ +478 public String getCharsetName() { +479 return this.charset.name(); +480 } +481 +482 /** +483 * Returns a string representation of the object, which includes the charset name. +484 * +485 * @return a string representation of the object. +486 */ +487 @Override +488 public String toString() { +489 return super.toString() + "[charsetName=" + this.charset + "]"; +490 } +491} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.binary; +019 +020import java.io.UnsupportedEncodingException; +021import java.nio.ByteBuffer; +022import java.nio.charset.Charset; +023 +024import org.apache.commons.codec.CharEncoding; +025import org.apache.commons.codec.Charsets; +026 +027/** +028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are +029 * specified in <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html"> +030 * Standard charsets</a>. +031 * +032 * <p>This class is immutable and thread-safe.</p> +033 * +034 * @see CharEncoding +035 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +036 * @version $Id$ +037 * @since 1.4 +038 */ +039public class StringUtils { +040 +041 /** +042 * <p> +043 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters. +044 * </p> +045 * +046 * <p> +047 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal. +048 * The comparison is case sensitive. +049 * </p> +050 * +051 * <pre> +052 * StringUtils.equals(null, null) = true +053 * StringUtils.equals(null, "abc") = false +054 * StringUtils.equals("abc", null) = false +055 * StringUtils.equals("abc", "abc") = true +056 * StringUtils.equals("abc", "ABC") = false +057 * </pre> +058 * +059 * <p> +060 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). +061 * </p> +062 * +063 * @see Object#equals(Object) +064 * @param cs1 +065 * the first CharSequence, may be <code>null</code> +066 * @param cs2 +067 * the second CharSequence, may be <code>null</code> +068 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code> +069 * @since 1.10 +070 */ +071 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { +072 if (cs1 == cs2) { +073 return true; +074 } +075 if (cs1 == null || cs2 == null) { +076 return false; +077 } +078 if (cs1 instanceof String && cs2 instanceof String) { +079 return cs1.equals(cs2); +080 } +081 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); +082 } +083 +084 /** +085 * Calls {@link String#getBytes(Charset)} +086 * +087 * @param string +088 * The string to encode (if null, return null). +089 * @param charset +090 * The {@link Charset} to encode the <code>String</code> +091 * @return the encoded bytes +092 */ +093 private static byte[] getBytes(final String string, final Charset charset) { +094 if (string == null) { +095 return null; +096 } +097 return string.getBytes(charset); +098 } +099 +100 /** +101 * Calls {@link String#getBytes(Charset)} +102 * +103 * @param string +104 * The string to encode (if null, return null). +105 * @param charset +106 * The {@link Charset} to encode the <code>String</code> +107 * @return the encoded bytes +108 */ +109 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { +110 if (string == null) { +111 return null; +112 } +113 return ByteBuffer.wrap(string.getBytes(charset)); +114 } +115 +116 /** +117 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte +118 * array. +119 * +120 * @param string +121 * the String to encode, may be <code>null</code> +122 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +123 * @throws NullPointerException +124 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is +125 * required by the Java platform specification. +126 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +127 * @see #getBytesUnchecked(String, String) +128 * @since 1.11 +129 */ +130 public static ByteBuffer getByteBufferUtf8(final String string) { +131 return getByteBuffer(string, Charsets.UTF_8); +132 } +133 +134 /** +135 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new +136 * byte array. +137 * +138 * @param string +139 * the String to encode, may be <code>null</code> +140 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +141 * @throws NullPointerException +142 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is +143 * required by the Java platform specification. +144 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +145 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +146 * @see #getBytesUnchecked(String, String) +147 */ +148 public static byte[] getBytesIso8859_1(final String string) { +149 return getBytes(string, Charsets.ISO_8859_1); +150 } +151 +152 +153 /** +154 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte +155 * array. +156 * <p> +157 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which +158 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. +159 * </p> +160 * +161 * @param string +162 * the String to encode, may be <code>null</code> +163 * @param charsetName +164 * The name of a required {@link java.nio.charset.Charset} +165 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +166 * @throws IllegalStateException +167 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a +168 * required charset name. +169 * @see CharEncoding +170 * @see String#getBytes(String) +171 */ +172 public static byte[] getBytesUnchecked(final String string, final String charsetName) { +173 if (string == null) { +174 return null; +175 } +176 try { +177 return string.getBytes(charsetName); +178 } catch (final UnsupportedEncodingException e) { +179 throw StringUtils.newIllegalStateException(charsetName, e); +180 } +181 } +182 +183 /** +184 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte +185 * array. +186 * +187 * @param string +188 * the String to encode, may be <code>null</code> +189 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +190 * @throws NullPointerException +191 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is +192 * required by the Java platform specification. +193 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +194 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +195 * @see #getBytesUnchecked(String, String) +196 */ +197 public static byte[] getBytesUsAscii(final String string) { +198 return getBytes(string, Charsets.US_ASCII); +199 } +200 +201 /** +202 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte +203 * array. +204 * +205 * @param string +206 * the String to encode, may be <code>null</code> +207 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +208 * @throws NullPointerException +209 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is +210 * required by the Java platform specification. +211 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +212 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +213 * @see #getBytesUnchecked(String, String) +214 */ +215 public static byte[] getBytesUtf16(final String string) { +216 return getBytes(string, Charsets.UTF_16); +217 } +218 +219 /** +220 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte +221 * array. +222 * +223 * @param string +224 * the String to encode, may be <code>null</code> +225 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +226 * @throws NullPointerException +227 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is +228 * required by the Java platform specification. +229 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +230 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +231 * @see #getBytesUnchecked(String, String) +232 */ +233 public static byte[] getBytesUtf16Be(final String string) { +234 return getBytes(string, Charsets.UTF_16BE); +235 } +236 +237 /** +238 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte +239 * array. +240 * +241 * @param string +242 * the String to encode, may be <code>null</code> +243 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +244 * @throws NullPointerException +245 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is +246 * required by the Java platform specification. +247 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +248 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +249 * @see #getBytesUnchecked(String, String) +250 */ +251 public static byte[] getBytesUtf16Le(final String string) { +252 return getBytes(string, Charsets.UTF_16LE); +253 } +254 +255 /** +256 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte +257 * array. +258 * +259 * @param string +260 * the String to encode, may be <code>null</code> +261 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> +262 * @throws NullPointerException +263 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is +264 * required by the Java platform specification. +265 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +266 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +267 * @see #getBytesUnchecked(String, String) +268 */ +269 public static byte[] getBytesUtf8(final String string) { +270 return getBytes(string, Charsets.UTF_8); +271 } +272 +273 private static IllegalStateException newIllegalStateException(final String charsetName, +274 final UnsupportedEncodingException e) { +275 return new IllegalStateException(charsetName + ": " + e); +276 } +277 +278 /** +279 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. +280 * +281 * @param bytes +282 * The bytes to be decoded into characters +283 * @param charset +284 * The {@link Charset} to encode the <code>String</code>; not {@code null} +285 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, +286 * or <code>null</code> if the input byte array was <code>null</code>. +287 * @throws NullPointerException +288 * Thrown if charset is {@code null} +289 */ +290 private static String newString(final byte[] bytes, final Charset charset) { +291 return bytes == null ? null : new String(bytes, charset); +292 } +293 +294 /** +295 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. +296 * <p> +297 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which +298 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. +299 * </p> +300 * +301 * @param bytes +302 * The bytes to be decoded into characters, may be <code>null</code> +303 * @param charsetName +304 * The name of a required {@link java.nio.charset.Charset} +305 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, +306 * or <code>null</code> if the input byte array was <code>null</code>. +307 * @throws IllegalStateException +308 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a +309 * required charset name. +310 * @see CharEncoding +311 * @see String#String(byte[], String) +312 */ +313 public static String newString(final byte[] bytes, final String charsetName) { +314 if (bytes == null) { +315 return null; +316 } +317 try { +318 return new String(bytes, charsetName); +319 } catch (final UnsupportedEncodingException e) { +320 throw StringUtils.newIllegalStateException(charsetName, e); +321 } +322 } +323 +324 /** +325 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. +326 * +327 * @param bytes +328 * The bytes to be decoded into characters, may be <code>null</code> +329 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or +330 * <code>null</code> if the input byte array was <code>null</code>. +331 * @throws NullPointerException +332 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is +333 * required by the Java platform specification. +334 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +335 */ +336 public static String newStringIso8859_1(final byte[] bytes) { +337 return newString(bytes, Charsets.ISO_8859_1); +338 } +339 +340 /** +341 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. +342 * +343 * @param bytes +344 * The bytes to be decoded into characters +345 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, +346 * or <code>null</code> if the input byte array was <code>null</code>. +347 * @throws NullPointerException +348 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is +349 * required by the Java platform specification. +350 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +351 */ +352 public static String newStringUsAscii(final byte[] bytes) { +353 return newString(bytes, Charsets.US_ASCII); +354 } +355 +356 /** +357 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. +358 * +359 * @param bytes +360 * The bytes to be decoded into characters +361 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset +362 * or <code>null</code> if the input byte array was <code>null</code>. +363 * @throws NullPointerException +364 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is +365 * required by the Java platform specification. +366 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +367 */ +368 public static String newStringUtf16(final byte[] bytes) { +369 return newString(bytes, Charsets.UTF_16); +370 } +371 +372 /** +373 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. +374 * +375 * @param bytes +376 * The bytes to be decoded into characters +377 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, +378 * or <code>null</code> if the input byte array was <code>null</code>. +379 * @throws NullPointerException +380 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is +381 * required by the Java platform specification. +382 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +383 */ +384 public static String newStringUtf16Be(final byte[] bytes) { +385 return newString(bytes, Charsets.UTF_16BE); +386 } +387 +388 /** +389 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. +390 * +391 * @param bytes +392 * The bytes to be decoded into characters +393 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, +394 * or <code>null</code> if the input byte array was <code>null</code>. +395 * @throws NullPointerException +396 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is +397 * required by the Java platform specification. +398 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +399 */ +400 public static String newStringUtf16Le(final byte[] bytes) { +401 return newString(bytes, Charsets.UTF_16LE); +402 } +403 +404 /** +405 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. +406 * +407 * @param bytes +408 * The bytes to be decoded into characters +409 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, +410 * or <code>null</code> if the input byte array was <code>null</code>. +411 * @throws NullPointerException +412 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is +413 * required by the Java platform specification. +414 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException +415 */ +416 public static String newStringUtf8(final byte[] bytes) { +417 return newString(bytes, Charsets.UTF_8); +418 } +419 +420} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.cli; +018 +019import java.io.File; +020import java.io.IOException; +021import java.nio.charset.Charset; +022import java.security.MessageDigest; +023import java.util.Arrays; +024import java.util.Locale; +025 +026import org.apache.commons.codec.binary.Hex; +027import org.apache.commons.codec.digest.DigestUtils; +028import org.apache.commons.codec.digest.MessageDigestAlgorithms; +029 +030/** +031 * A minimal command line to run digest over files, directories or a string +032 * +033 * @see #main(String[]) +034 * @since 1.11 +035 */ +036public class Digest { +037 +038 /** +039 * Runs the digest algorithm in {@code args[0]} on the file in {@code args[1]}. If there is no {@code args[1]}, use +040 * standard input. +041 * +042 * <p> +043 * The algorithm can also be {@code ALL} or {@code *} to output one line for each known algorithm. +044 * </p> +045 * +046 * @param args +047 * {@code args[0]} is one of {@link MessageDigestAlgorithms} name, +048 * {@link MessageDigest} name, {@code ALL}, or {@code *}. +049 * {@code args[1+]} is a FILE/DIRECTORY/String. +050 * @throws IOException if an error occurs +051 */ +052 public static void main(final String[] args) throws IOException { +053 new Digest(args).run(); +054 } +055 +056 private final String algorithm; +057 private final String[] args; +058 private final String[] inputs; +059 +060 private Digest(final String[] args) { +061 if (args == null) { +062 throw new IllegalArgumentException("args"); +063 } +064 if (args.length == 0) { +065 throw new IllegalArgumentException( +066 String.format("Usage: java %s [algorithm] [FILE|DIRECTORY|string] ...", Digest.class.getName())); +067 } +068 this.args = args; +069 algorithm = args[0]; +070 if (args.length <= 1) { +071 inputs = null; +072 } else { +073 inputs = new String[args.length -1]; +074 System.arraycopy(args, 1, inputs, 0, inputs.length); +075 } +076 } +077 +078 private void println(final String prefix, final byte[] digest) { +079 println(prefix, digest, null); +080 } +081 +082 private void println(final String prefix, final byte[] digest, final String fileName) { +083 // The standard appears to be to print +084 // hex, space, then either space or '*' followed by filename +085 // where '*' is used for binary files +086 // shasum(1) has a -b option which generates " *" separator +087 // we don't distinguish binary files at present +088 System.out.println(prefix + Hex.encodeHexString(digest) + (fileName != null ? " " + fileName : "")); +089 } +090 +091 private void run() throws IOException { +092 if (algorithm.equalsIgnoreCase("ALL") || algorithm.equals("*")) { +093 run(MessageDigestAlgorithms.values()); +094 return; +095 } +096 final MessageDigest messageDigest = DigestUtils.getDigest(algorithm, null); +097 if (messageDigest != null) { +098 run("", messageDigest); +099 } else { +100 run("", DigestUtils.getDigest(algorithm.toUpperCase(Locale.ROOT))); +101 } +102 } +103 +104 private void run(final String[] digestAlgorithms) throws IOException { +105 for (final String messageDigestAlgorithm : digestAlgorithms) { +106 if (DigestUtils.isAvailable(messageDigestAlgorithm)) { +107 run(messageDigestAlgorithm + " ", messageDigestAlgorithm); +108 } +109 } +110 } +111 +112 private void run(final String prefix, final MessageDigest messageDigest) throws IOException { +113 if (inputs == null) { +114 println(prefix, DigestUtils.digest(messageDigest, System.in)); +115 return; +116 } +117 for(final String source : inputs) { +118 final File file = new File(source); +119 if (file.isFile()) { +120 println(prefix, DigestUtils.digest(messageDigest, file), source); +121 } else if (file.isDirectory()) { +122 final File[] listFiles = file.listFiles(); +123 if (listFiles != null) { +124 run(prefix, messageDigest, listFiles); +125 } +126 } else { +127 // use the default charset for the command-line parameter +128 final byte[] bytes = source.getBytes(Charset.defaultCharset()); +129 println(prefix, DigestUtils.digest(messageDigest, bytes)); +130 } +131 } +132 } +133 +134 private void run(final String prefix, final MessageDigest messageDigest, final File[] files) throws IOException { +135 for (final File file : files) { +136 if (file.isFile()) { +137 println(prefix, DigestUtils.digest(messageDigest, file), file.getName()); +138 } +139 } +140 } +141 +142 private void run(final String prefix, final String messageDigestAlgorithm) throws IOException { +143 run(prefix, DigestUtils.getDigest(messageDigestAlgorithm)); +144 } +145 +146 @Override +147 public String toString() { +148 return String.format("%s %s", super.toString(), Arrays.toString(args)); +149 } +150} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.digest; +018 +019import java.security.SecureRandom; +020import java.util.concurrent.ThreadLocalRandom; +021 +022import org.apache.commons.codec.Charsets; +023 +024/** +025 * GNU libc crypt(3) compatible hash method. +026 * <p> +027 * See {@link #crypt(String, String)} for further details. +028 * <p> +029 * This class is immutable and thread-safe. +030 * +031 * @version $Id$ +032 * @since 1.7 +033 */ +034public class Crypt { +035 +036 /** +037 * Encrypts a password in a crypt(3) compatible way. +038 * <p> +039 * A random salt and the default algorithm (currently SHA-512) are used. See {@link #crypt(String, String)} for +040 * details. +041 * </p> +042 * <p> +043 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +044 * {@link SecureRandom} to generate your own salts and calling {@link #crypt(byte[], String)}. +045 * </p> +046 * +047 * @param keyBytes +048 * plaintext password +049 * @return hash value +050 * @throws IllegalArgumentException +051 * when a {@link java.security.NoSuchAlgorithmException} is caught. +052 */ +053 public static String crypt(final byte[] keyBytes) { +054 return crypt(keyBytes, null); +055 } +056 +057 /** +058 * Encrypts a password in a crypt(3) compatible way. +059 * <p> +060 * If no salt is provided, a random salt and the default algorithm (currently SHA-512) will be used. See +061 * {@link #crypt(String, String)} for details. +062 * +063 * @param keyBytes +064 * plaintext password +065 * @param salt +066 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +067 * you using {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to +068 * generate your own salts. +069 * @return hash value +070 * @throws IllegalArgumentException +071 * if the salt does not match the allowed pattern +072 * @throws IllegalArgumentException +073 * when a {@link java.security.NoSuchAlgorithmException} is caught. +074 */ +075 public static String crypt(final byte[] keyBytes, final String salt) { +076 if (salt == null) { +077 return Sha2Crypt.sha512Crypt(keyBytes); +078 } else if (salt.startsWith(Sha2Crypt.SHA512_PREFIX)) { +079 return Sha2Crypt.sha512Crypt(keyBytes, salt); +080 } else if (salt.startsWith(Sha2Crypt.SHA256_PREFIX)) { +081 return Sha2Crypt.sha256Crypt(keyBytes, salt); +082 } else if (salt.startsWith(Md5Crypt.MD5_PREFIX)) { +083 return Md5Crypt.md5Crypt(keyBytes, salt); +084 } else { +085 return UnixCrypt.crypt(keyBytes, salt); +086 } +087 } +088 +089 /** +090 * Calculates the digest using the strongest crypt(3) algorithm. +091 * <p> +092 * A random salt and the default algorithm (currently SHA-512) are used. +093 * </p> +094 * <p> +095 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +096 * {@link SecureRandom} to generate your own salts and calling {@link #crypt(String, String)}. +097 * </p> +098 * +099 * @see #crypt(String, String) +100 * @param key +101 * plaintext password +102 * @return hash value +103 * @throws IllegalArgumentException +104 * when a {@link java.security.NoSuchAlgorithmException} is caught. +105 */ +106 public static String crypt(final String key) { +107 return crypt(key, null); +108 } +109 +110 /** +111 * Encrypts a password in a crypt(3) compatible way. +112 * <p> +113 * The exact algorithm depends on the format of the salt string: +114 * <ul> +115 * <li>SHA-512 salts start with {@code $6$} and are up to 16 chars long. +116 * <li>SHA-256 salts start with {@code $5$} and are up to 16 chars long +117 * <li>MD5 salts start with {@code $1$} and are up to 8 chars long +118 * <li>DES, the traditional UnixCrypt algorithm is used with only 2 chars +119 * <li>Only the first 8 chars of the passwords are used in the DES algorithm! +120 * </ul> +121 * The magic strings {@code "$apr1$"} and {@code "$2a$"} are not recognized by this method as its output should be +122 * identical with that of the libc implementation. +123 * <p> +124 * The rest of the salt string is drawn from the set {@code [a-zA-Z0-9./]} and is cut at the maximum length of if a +125 * {@code "$"} sign is encountered. It is therefore valid to enter a complete hash value as salt to e.g. verify a +126 * password with: +127 * +128 * <pre> +129 * storedPwd.equals(crypt(enteredPwd, storedPwd)) +130 * </pre> +131 * <p> +132 * The resulting string starts with the marker string ({@code $n$}), where n is the same as the input salt. +133 * The salt is then appended, followed by a {@code "$"} sign. +134 * This is followed by the actual hash value. +135 * For DES the string only contains the salt and actual hash. +136 * The total length is dependent on the algorithm used: +137 * <ul> +138 * <li>SHA-512: 106 chars +139 * <li>SHA-256: 63 chars +140 * <li>MD5: 34 chars +141 * <li>DES: 13 chars +142 * </ul> +143 * <p> +144 * Example: +145 * +146 * <pre> +147 * crypt("secret", "$1$xxxx") => "$1$xxxx$aMkevjfEIpa35Bh3G4bAc." +148 * crypt("secret", "xx") => "xxWAum7tHdIUw" +149 * </pre> +150 * <p> +151 * This method comes in a variation that accepts a byte[] array to support input strings that are not encoded in +152 * UTF-8 but e.g. in ISO-8859-1 where equal characters result in different byte values. +153 * +154 * @see "The man page of the libc crypt (3) function." +155 * @param key +156 * plaintext password as entered by the used +157 * @param salt +158 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +159 * you using {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to +160 * generate your own salts. +161 * @return hash value, i.e. encrypted password including the salt string +162 * @throws IllegalArgumentException +163 * if the salt does not match the allowed pattern +164 * @throws IllegalArgumentException +165 * when a {@link java.security.NoSuchAlgorithmException} is caught. * +166 */ +167 public static String crypt(final String key, final String salt) { +168 return crypt(key.getBytes(Charsets.UTF_8), salt); +169 } +170} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.digest; +019 +020import java.io.BufferedInputStream; +021import java.io.File; +022import java.io.FileInputStream; +023import java.io.IOException; +024import java.io.InputStream; +025import java.nio.ByteBuffer; +026import java.security.MessageDigest; +027import java.security.NoSuchAlgorithmException; +028 +029import org.apache.commons.codec.binary.Hex; +030import org.apache.commons.codec.binary.StringUtils; +031 +032/** +033 * Operations to simplify common {@link java.security.MessageDigest} tasks. +034 * This class is immutable and thread-safe. +035 * However the MessageDigest instances it creates generally won't be. +036 * <p> +037 * The {@link MessageDigestAlgorithms} class provides constants for standard +038 * digest algorithms that can be used with the {@link #getDigest(String)} method +039 * and other methods that require the Digest algorithm name. +040 * <p> +041 * Note: the class has short-hand methods for all the algorithms present as standard in Java 6. +042 * This approach requires lots of methods for each algorithm, and quickly becomes unwieldy. +043 * The following code works with all algorithms: +044 * <pre> +045 * import static org.apache.commons.codec.digest.MessageDigestAlgorithms.SHA_224; +046 * ... +047 * byte [] digest = new DigestUtils(SHA_224).digest(dataToDigest); +048 * String hdigest = new DigestUtils(SHA_224).digestAsHex(new File("pom.xml")); +049 * </pre> +050 * @see MessageDigestAlgorithms +051 * @version $Id$ +052 */ +053public class DigestUtils { +054 +055 private static final int STREAM_BUFFER_LENGTH = 1024; +056 +057 /** +058 * Reads through a byte array and returns the digest for the data. Provided for symmetry with other methods. +059 * +060 * @param messageDigest +061 * The MessageDigest to use (e.g. MD5) +062 * @param data +063 * Data to digest +064 * @return the digest +065 * @since 1.11 +066 */ +067 public static byte[] digest(final MessageDigest messageDigest, final byte[] data) { +068 return messageDigest.digest(data); +069 } +070 +071 /** +072 * Reads through a ByteBuffer and returns the digest for the data +073 * +074 * @param messageDigest +075 * The MessageDigest to use (e.g. MD5) +076 * @param data +077 * Data to digest +078 * @return the digest +079 * +080 * @since 1.11 +081 */ +082 public static byte[] digest(final MessageDigest messageDigest, final ByteBuffer data) { +083 messageDigest.update(data); +084 return messageDigest.digest(); +085 } +086 +087 /** +088 * Reads through a File and returns the digest for the data +089 * +090 * @param messageDigest +091 * The MessageDigest to use (e.g. MD5) +092 * @param data +093 * Data to digest +094 * @return the digest +095 * @throws IOException +096 * On error reading from the stream +097 * @since 1.11 +098 */ +099 public static byte[] digest(final MessageDigest messageDigest, final File data) throws IOException { +100 return updateDigest(messageDigest, data).digest(); +101 } +102 +103 /** +104 * Reads through an InputStream and returns the digest for the data +105 * +106 * @param messageDigest +107 * The MessageDigest to use (e.g. MD5) +108 * @param data +109 * Data to digest +110 * @return the digest +111 * @throws IOException +112 * On error reading from the stream +113 * @since 1.11 (was private) +114 */ +115 public static byte[] digest(final MessageDigest messageDigest, final InputStream data) throws IOException { +116 return updateDigest(messageDigest, data).digest(); +117 } +118 +119 /** +120 * Returns a <code>MessageDigest</code> for the given <code>algorithm</code>. +121 * +122 * @param algorithm +123 * the name of the algorithm requested. See <a +124 * href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" +125 * >Appendix A in the Java Cryptography Architecture Reference Guide</a> for information about standard +126 * algorithm names. +127 * @return A digest instance. +128 * @see MessageDigest#getInstance(String) +129 * @throws IllegalArgumentException +130 * when a {@link NoSuchAlgorithmException} is caught. +131 */ +132 public static MessageDigest getDigest(final String algorithm) { +133 try { +134 return MessageDigest.getInstance(algorithm); +135 } catch (final NoSuchAlgorithmException e) { +136 throw new IllegalArgumentException(e); +137 } +138 } +139 +140 /** +141 * Returns a <code>MessageDigest</code> for the given <code>algorithm</code> or a default if there is a problem +142 * getting the algorithm. +143 * +144 * @param algorithm +145 * the name of the algorithm requested. See +146 * <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" > +147 * Appendix A in the Java Cryptography Architecture Reference Guide</a> for information about standard +148 * algorithm names. +149 * @param defaultMessageDigest +150 * The default MessageDigest. +151 * @return A digest instance. +152 * @see MessageDigest#getInstance(String) +153 * @throws IllegalArgumentException +154 * when a {@link NoSuchAlgorithmException} is caught. +155 * @since 1.11 +156 */ +157 public static MessageDigest getDigest(final String algorithm, final MessageDigest defaultMessageDigest) { +158 try { +159 return MessageDigest.getInstance(algorithm); +160 } catch (final Exception e) { +161 return defaultMessageDigest; +162 } +163 } +164 +165 /** +166 * Returns an MD2 MessageDigest. +167 * +168 * @return An MD2 digest instance. +169 * @throws IllegalArgumentException +170 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because MD2 is a +171 * built-in algorithm +172 * @see MessageDigestAlgorithms#MD2 +173 * @since 1.7 +174 */ +175 public static MessageDigest getMd2Digest() { +176 return getDigest(MessageDigestAlgorithms.MD2); +177 } +178 +179 /** +180 * Returns an MD5 MessageDigest. +181 * +182 * @return An MD5 digest instance. +183 * @throws IllegalArgumentException +184 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because MD5 is a +185 * built-in algorithm +186 * @see MessageDigestAlgorithms#MD5 +187 */ +188 public static MessageDigest getMd5Digest() { +189 return getDigest(MessageDigestAlgorithms.MD5); +190 } +191 +192 /** +193 * Returns an SHA-1 digest. +194 * +195 * @return An SHA-1 digest instance. +196 * @throws IllegalArgumentException +197 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because SHA-1 is a +198 * built-in algorithm +199 * @see MessageDigestAlgorithms#SHA_1 +200 * @since 1.7 +201 */ +202 public static MessageDigest getSha1Digest() { +203 return getDigest(MessageDigestAlgorithms.SHA_1); +204 } +205 +206 /** +207 * Returns an SHA-256 digest. +208 * +209 * @return An SHA-256 digest instance. +210 * @throws IllegalArgumentException +211 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because SHA-256 is a +212 * built-in algorithm +213 * @see MessageDigestAlgorithms#SHA_256 +214 */ +215 public static MessageDigest getSha256Digest() { +216 return getDigest(MessageDigestAlgorithms.SHA_256); +217 } +218 +219 /** +220 * Returns an SHA3-224 digest. +221 * +222 * @return An SHA3-224 digest instance. +223 * @throws IllegalArgumentException +224 * when a {@link NoSuchAlgorithmException} is caught, which should not happen on Oracle Java 9 and greater. +225 * @see MessageDigestAlgorithms#SHA3_224 +226 * @since 1.12 +227 */ +228 public static MessageDigest getSha3_224Digest() { +229 return getDigest(MessageDigestAlgorithms.SHA3_224); +230 } +231 +232 /** +233 * Returns an SHA3-256 digest. +234 * +235 * @return An SHA3-256 digest instance. +236 * @throws IllegalArgumentException +237 * when a {@link NoSuchAlgorithmException} is caught, which should not happen on Oracle Java 9 and greater. +238 * @see MessageDigestAlgorithms#SHA3_256 +239 * @since 1.12 +240 */ +241 public static MessageDigest getSha3_256Digest() { +242 return getDigest(MessageDigestAlgorithms.SHA3_256); +243 } +244 +245 /** +246 * Returns an SHA3-384 digest. +247 * +248 * @return An SHA3-384 digest instance. +249 * @throws IllegalArgumentException +250 * when a {@link NoSuchAlgorithmException} is caught, which should not happen on Oracle Java 9 and greater. +251 * @see MessageDigestAlgorithms#SHA3_384 +252 * @since 1.12 +253 */ +254 public static MessageDigest getSha3_384Digest() { +255 return getDigest(MessageDigestAlgorithms.SHA3_384); +256 } +257 +258 /** +259 * Returns an SHA3-512 digest. +260 * +261 * @return An SHA3-512 digest instance. +262 * @throws IllegalArgumentException +263 * when a {@link NoSuchAlgorithmException} is caught, which should not happen on Oracle Java 9 and greater. +264 * @see MessageDigestAlgorithms#SHA3_512 +265 * @since 1.12 +266 */ +267 public static MessageDigest getSha3_512Digest() { +268 return getDigest(MessageDigestAlgorithms.SHA3_512); +269 } +270 +271 /** +272 * Returns an SHA-384 digest. +273 * +274 * @return An SHA-384 digest instance. +275 * @throws IllegalArgumentException +276 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because SHA-384 is a +277 * built-in algorithm +278 * @see MessageDigestAlgorithms#SHA_384 +279 */ +280 public static MessageDigest getSha384Digest() { +281 return getDigest(MessageDigestAlgorithms.SHA_384); +282 } +283 +284 /** +285 * Returns an SHA-512 digest. +286 * +287 * @return An SHA-512 digest instance. +288 * @throws IllegalArgumentException +289 * when a {@link NoSuchAlgorithmException} is caught, which should never happen because SHA-512 is a +290 * built-in algorithm +291 * @see MessageDigestAlgorithms#SHA_512 +292 */ +293 public static MessageDigest getSha512Digest() { +294 return getDigest(MessageDigestAlgorithms.SHA_512); +295 } +296 +297 /** +298 * Returns an SHA-1 digest. +299 * +300 * @return An SHA-1 digest instance. +301 * @throws IllegalArgumentException +302 * when a {@link NoSuchAlgorithmException} is caught +303 * @deprecated (1.11) Use {@link #getSha1Digest()} +304 */ +305 @Deprecated +306 public static MessageDigest getShaDigest() { +307 return getSha1Digest(); +308 } +309 +310 /** +311 * Test whether the algorithm is supported. +312 * @param messageDigestAlgorithm the algorithm name +313 * @return {@code true} if the algorithm can be found +314 * @since 1.11 +315 */ +316 public static boolean isAvailable(final String messageDigestAlgorithm) { +317 return getDigest(messageDigestAlgorithm, null) != null; +318 } +319 +320 /** +321 * Calculates the MD2 digest and returns the value as a 16 element <code>byte[]</code>. +322 * +323 * @param data +324 * Data to digest +325 * @return MD2 digest +326 * @since 1.7 +327 */ +328 public static byte[] md2(final byte[] data) { +329 return getMd2Digest().digest(data); +330 } +331 +332 /** +333 * Calculates the MD2 digest and returns the value as a 16 element <code>byte[]</code>. +334 * +335 * @param data +336 * Data to digest +337 * @return MD2 digest +338 * @throws IOException +339 * On error reading from the stream +340 * @since 1.7 +341 */ +342 public static byte[] md2(final InputStream data) throws IOException { +343 return digest(getMd2Digest(), data); +344 } +345 +346 /** +347 * Calculates the MD2 digest and returns the value as a 16 element <code>byte[]</code>. +348 * +349 * @param data +350 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +351 * @return MD2 digest +352 * @since 1.7 +353 */ +354 public static byte[] md2(final String data) { +355 return md2(StringUtils.getBytesUtf8(data)); +356 } +357 +358 /** +359 * Calculates the MD2 digest and returns the value as a 32 character hex string. +360 * +361 * @param data +362 * Data to digest +363 * @return MD2 digest as a hex string +364 * @since 1.7 +365 */ +366 public static String md2Hex(final byte[] data) { +367 return Hex.encodeHexString(md2(data)); +368 } +369 +370 /** +371 * Calculates the MD2 digest and returns the value as a 32 character hex string. +372 * +373 * @param data +374 * Data to digest +375 * @return MD2 digest as a hex string +376 * @throws IOException +377 * On error reading from the stream +378 * @since 1.7 +379 */ +380 public static String md2Hex(final InputStream data) throws IOException { +381 return Hex.encodeHexString(md2(data)); +382 } +383 +384 /** +385 * Calculates the MD2 digest and returns the value as a 32 character hex string. +386 * +387 * @param data +388 * Data to digest +389 * @return MD2 digest as a hex string +390 * @since 1.7 +391 */ +392 public static String md2Hex(final String data) { +393 return Hex.encodeHexString(md2(data)); +394 } +395 +396 /** +397 * Calculates the MD5 digest and returns the value as a 16 element <code>byte[]</code>. +398 * +399 * @param data +400 * Data to digest +401 * @return MD5 digest +402 */ +403 public static byte[] md5(final byte[] data) { +404 return getMd5Digest().digest(data); +405 } +406 +407 /** +408 * Calculates the MD5 digest and returns the value as a 16 element <code>byte[]</code>. +409 * +410 * @param data +411 * Data to digest +412 * @return MD5 digest +413 * @throws IOException +414 * On error reading from the stream +415 * @since 1.4 +416 */ +417 public static byte[] md5(final InputStream data) throws IOException { +418 return digest(getMd5Digest(), data); +419 } +420 +421 /** +422 * Calculates the MD5 digest and returns the value as a 16 element <code>byte[]</code>. +423 * +424 * @param data +425 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +426 * @return MD5 digest +427 */ +428 public static byte[] md5(final String data) { +429 return md5(StringUtils.getBytesUtf8(data)); +430 } +431 +432 /** +433 * Calculates the MD5 digest and returns the value as a 32 character hex string. +434 * +435 * @param data +436 * Data to digest +437 * @return MD5 digest as a hex string +438 */ +439 public static String md5Hex(final byte[] data) { +440 return Hex.encodeHexString(md5(data)); +441 } +442 +443 /** +444 * Calculates the MD5 digest and returns the value as a 32 character hex string. +445 * +446 * @param data +447 * Data to digest +448 * @return MD5 digest as a hex string +449 * @throws IOException +450 * On error reading from the stream +451 * @since 1.4 +452 */ +453 public static String md5Hex(final InputStream data) throws IOException { +454 return Hex.encodeHexString(md5(data)); +455 } +456 +457 /** +458 * Calculates the MD5 digest and returns the value as a 32 character hex string. +459 * +460 * @param data +461 * Data to digest +462 * @return MD5 digest as a hex string +463 */ +464 public static String md5Hex(final String data) { +465 return Hex.encodeHexString(md5(data)); +466 } +467 +468 /** +469 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +470 * +471 * @param data +472 * Data to digest +473 * @return SHA-1 digest +474 * @deprecated (1.11) Use {@link #sha1(byte[])} +475 */ +476 @Deprecated +477 public static byte[] sha(final byte[] data) { +478 return sha1(data); +479 } +480 +481 /** +482 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +483 * +484 * @param data +485 * Data to digest +486 * @return SHA-1 digest +487 * @throws IOException +488 * On error reading from the stream +489 * @since 1.4 +490 * @deprecated (1.11) Use {@link #sha1(InputStream)} +491 */ +492 @Deprecated +493 public static byte[] sha(final InputStream data) throws IOException { +494 return sha1(data); +495 } +496 +497 /** +498 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +499 * +500 * @param data +501 * Data to digest +502 * @return SHA-1 digest +503 * @deprecated (1.11) Use {@link #sha1(String)} +504 */ +505 @Deprecated +506 public static byte[] sha(final String data) { +507 return sha1(data); +508 } +509 +510 /** +511 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +512 * +513 * @param data +514 * Data to digest +515 * @return SHA-1 digest +516 * @since 1.7 +517 */ +518 public static byte[] sha1(final byte[] data) { +519 return getSha1Digest().digest(data); +520 } +521 +522 /** +523 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +524 * +525 * @param data +526 * Data to digest +527 * @return SHA-1 digest +528 * @throws IOException +529 * On error reading from the stream +530 * @since 1.7 +531 */ +532 public static byte[] sha1(final InputStream data) throws IOException { +533 return digest(getSha1Digest(), data); +534 } +535 +536 /** +537 * Calculates the SHA-1 digest and returns the value as a <code>byte[]</code>. +538 * +539 * @param data +540 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +541 * @return SHA-1 digest +542 */ +543 public static byte[] sha1(final String data) { +544 return sha1(StringUtils.getBytesUtf8(data)); +545 } +546 +547 /** +548 * Calculates the SHA-1 digest and returns the value as a hex string. +549 * +550 * @param data +551 * Data to digest +552 * @return SHA-1 digest as a hex string +553 * @since 1.7 +554 */ +555 public static String sha1Hex(final byte[] data) { +556 return Hex.encodeHexString(sha1(data)); +557 } +558 +559 /** +560 * Calculates the SHA-1 digest and returns the value as a hex string. +561 * +562 * @param data +563 * Data to digest +564 * @return SHA-1 digest as a hex string +565 * @throws IOException +566 * On error reading from the stream +567 * @since 1.7 +568 */ +569 public static String sha1Hex(final InputStream data) throws IOException { +570 return Hex.encodeHexString(sha1(data)); +571 } +572 +573 /** +574 * Calculates the SHA-1 digest and returns the value as a hex string. +575 * +576 * @param data +577 * Data to digest +578 * @return SHA-1 digest as a hex string +579 * @since 1.7 +580 */ +581 public static String sha1Hex(final String data) { +582 return Hex.encodeHexString(sha1(data)); +583 } +584 +585 /** +586 * Calculates the SHA-256 digest and returns the value as a <code>byte[]</code>. +587 * +588 * @param data +589 * Data to digest +590 * @return SHA-256 digest +591 * @since 1.4 +592 */ +593 public static byte[] sha256(final byte[] data) { +594 return getSha256Digest().digest(data); +595 } +596 +597 /** +598 * Calculates the SHA-256 digest and returns the value as a <code>byte[]</code>. +599 * +600 * @param data +601 * Data to digest +602 * @return SHA-256 digest +603 * @throws IOException +604 * On error reading from the stream +605 * @since 1.4 +606 */ +607 public static byte[] sha256(final InputStream data) throws IOException { +608 return digest(getSha256Digest(), data); +609 } +610 +611 /** +612 * Calculates the SHA-256 digest and returns the value as a <code>byte[]</code>. +613 * +614 * @param data +615 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +616 * @return SHA-256 digest +617 * @since 1.4 +618 */ +619 public static byte[] sha256(final String data) { +620 return sha256(StringUtils.getBytesUtf8(data)); +621 } +622 +623 /** +624 * Calculates the SHA-256 digest and returns the value as a hex string. +625 * +626 * @param data +627 * Data to digest +628 * @return SHA-256 digest as a hex string +629 * @since 1.4 +630 */ +631 public static String sha256Hex(final byte[] data) { +632 return Hex.encodeHexString(sha256(data)); +633 } +634 +635 /** +636 * Calculates the SHA-256 digest and returns the value as a hex string. +637 * +638 * @param data +639 * Data to digest +640 * @return SHA-256 digest as a hex string +641 * @throws IOException +642 * On error reading from the stream +643 * @since 1.4 +644 */ +645 public static String sha256Hex(final InputStream data) throws IOException { +646 return Hex.encodeHexString(sha256(data)); +647 } +648 +649 /** +650 * Calculates the SHA-256 digest and returns the value as a hex string. +651 * +652 * @param data +653 * Data to digest +654 * @return SHA-256 digest as a hex string +655 * @since 1.4 +656 */ +657 public static String sha256Hex(final String data) { +658 return Hex.encodeHexString(sha256(data)); +659 } +660 +661 /** +662 * Calculates the SHA3-224 digest and returns the value as a <code>byte[]</code>. +663 * +664 * @param data +665 * Data to digest +666 * @return SHA3-224 digest +667 * @since 1.12 +668 */ +669 public static byte[] sha3_224(final byte[] data) { +670 return getSha3_224Digest().digest(data); +671 } +672 +673 /** +674 * Calculates the SHA3-224 digest and returns the value as a <code>byte[]</code>. +675 * +676 * @param data +677 * Data to digest +678 * @return SHA3-224 digest +679 * @throws IOException +680 * On error reading from the stream +681 * @since 1.12 +682 */ +683 public static byte[] sha3_224(final InputStream data) throws IOException { +684 return digest(getSha3_224Digest(), data); +685 } +686 +687 /** +688 * Calculates the SHA3-224 digest and returns the value as a <code>byte[]</code>. +689 * +690 * @param data +691 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +692 * @return SHA3-224 digest +693 * @since 1.12 +694 */ +695 public static byte[] sha3_224(final String data) { +696 return sha3_224(StringUtils.getBytesUtf8(data)); +697 } +698 +699 /** +700 * Calculates the SHA3-224 digest and returns the value as a hex string. +701 * +702 * @param data +703 * Data to digest +704 * @return SHA3-224 digest as a hex string +705 * @since 1.12 +706 */ +707 public static String sha3_224Hex(final String data) { +708 return Hex.encodeHexString(sha3_224(data)); +709 } +710 +711 /** +712 * Calculates the SHA3-256 digest and returns the value as a <code>byte[]</code>. +713 * +714 * @param data +715 * Data to digest +716 * @return SHA3-256 digest +717 * @since 1.12 +718 */ +719 public static byte[] sha3_256(final byte[] data) { +720 return getSha3_256Digest().digest(data); +721 } +722 +723 /** +724 * Calculates the SHA3-256 digest and returns the value as a <code>byte[]</code>. +725 * +726 * @param data +727 * Data to digest +728 * @return SHA3-256 digest +729 * @throws IOException +730 * On error reading from the stream +731 * @since 1.12 +732 */ +733 public static byte[] sha3_256(final InputStream data) throws IOException { +734 return digest(getSha3_256Digest(), data); +735 } +736 +737 /** +738 * Calculates the SHA3-256 digest and returns the value as a <code>byte[]</code>. +739 * +740 * @param data +741 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +742 * @return SHA3-256 digest +743 * @since 1.12 +744 */ +745 public static byte[] sha3_256(final String data) { +746 return sha3_256(StringUtils.getBytesUtf8(data)); +747 } +748 +749 /** +750 * Calculates the SHA3-256 digest and returns the value as a hex string. +751 * +752 * @param data +753 * Data to digest +754 * @return SHA3-256 digest as a hex string +755 * @since 1.12 +756 */ +757 public static String sha3_256Hex(final String data) { +758 return Hex.encodeHexString(sha3_256(data)); +759 } +760 +761 /** +762 * Calculates the SHA3-384 digest and returns the value as a <code>byte[]</code>. +763 * +764 * @param data +765 * Data to digest +766 * @return SHA3-384 digest +767 * @since 1.12 +768 */ +769 public static byte[] sha3_384(final byte[] data) { +770 return getSha3_384Digest().digest(data); +771 } +772 +773 /** +774 * Calculates the SHA3-384 digest and returns the value as a <code>byte[]</code>. +775 * +776 * @param data +777 * Data to digest +778 * @return SHA3-384 digest +779 * @throws IOException +780 * On error reading from the stream +781 * @since 1.12 +782 */ +783 public static byte[] sha3_384(final InputStream data) throws IOException { +784 return digest(getSha3_384Digest(), data); +785 } +786 +787 /** +788 * Calculates the SHA3-384 digest and returns the value as a <code>byte[]</code>. +789 * +790 * @param data +791 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +792 * @return SHA3-384 digest +793 * @since 1.12 +794 */ +795 public static byte[] sha3_384(final String data) { +796 return sha3_384(StringUtils.getBytesUtf8(data)); +797 } +798 +799 /** +800 * Calculates the SHA3-384 digest and returns the value as a hex string. +801 * +802 * @param data +803 * Data to digest +804 * @return SHA3-384 digest as a hex string +805 * @since 1.12 +806 */ +807 public static String sha3_384Hex(final String data) { +808 return Hex.encodeHexString(sha3_384(data)); +809 } +810 +811 /** +812 * Calculates the SHA3-512 digest and returns the value as a <code>byte[]</code>. +813 * +814 * @param data +815 * Data to digest +816 * @return SHA3-512 digest +817 * @since 1.12 +818 */ +819 public static byte[] sha3_512(final byte[] data) { +820 return getSha3_512Digest().digest(data); +821 } +822 +823 /** +824 * Calculates the SHA3-512 digest and returns the value as a <code>byte[]</code>. +825 * +826 * @param data +827 * Data to digest +828 * @return SHA3-512 digest +829 * @throws IOException +830 * On error reading from the stream +831 * @since 1.12 +832 */ +833 public static byte[] sha3_512(final InputStream data) throws IOException { +834 return digest(getSha3_512Digest(), data); +835 } +836 +837 /** +838 * Calculates the SHA3-512 digest and returns the value as a <code>byte[]</code>. +839 * +840 * @param data +841 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +842 * @return SHA3-512 digest +843 * @since 1.12 +844 */ +845 public static byte[] sha3_512(final String data) { +846 return sha3_512(StringUtils.getBytesUtf8(data)); +847 } +848 +849 /** +850 * Calculates the SHA3-512 digest and returns the value as a hex string. +851 * +852 * @param data +853 * Data to digest +854 * @return SHA3-512 digest as a hex string +855 * @since 1.12 +856 */ +857 public static String sha3_512Hex(final String data) { +858 return Hex.encodeHexString(sha3_512(data)); +859 } +860 +861 /** +862 * Calculates the SHA-384 digest and returns the value as a <code>byte[]</code>. +863 * +864 * @param data +865 * Data to digest +866 * @return SHA-384 digest +867 * @since 1.4 +868 */ +869 public static byte[] sha384(final byte[] data) { +870 return getSha384Digest().digest(data); +871 } +872 +873 /** +874 * Calculates the SHA-384 digest and returns the value as a <code>byte[]</code>. +875 * +876 * @param data +877 * Data to digest +878 * @return SHA-384 digest +879 * @throws IOException +880 * On error reading from the stream +881 * @since 1.4 +882 */ +883 public static byte[] sha384(final InputStream data) throws IOException { +884 return digest(getSha384Digest(), data); +885 } +886 +887 /** +888 * Calculates the SHA-384 digest and returns the value as a <code>byte[]</code>. +889 * +890 * @param data +891 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +892 * @return SHA-384 digest +893 * @since 1.4 +894 */ +895 public static byte[] sha384(final String data) { +896 return sha384(StringUtils.getBytesUtf8(data)); +897 } +898 +899 /** +900 * Calculates the SHA-384 digest and returns the value as a hex string. +901 * +902 * @param data +903 * Data to digest +904 * @return SHA-384 digest as a hex string +905 * @since 1.4 +906 */ +907 public static String sha384Hex(final byte[] data) { +908 return Hex.encodeHexString(sha384(data)); +909 } +910 +911 /** +912 * Calculates the SHA-384 digest and returns the value as a hex string. +913 * +914 * @param data +915 * Data to digest +916 * @return SHA-384 digest as a hex string +917 * @throws IOException +918 * On error reading from the stream +919 * @since 1.4 +920 */ +921 public static String sha384Hex(final InputStream data) throws IOException { +922 return Hex.encodeHexString(sha384(data)); +923 } +924 +925 /** +926 * Calculates the SHA-384 digest and returns the value as a hex string. +927 * +928 * @param data +929 * Data to digest +930 * @return SHA-384 digest as a hex string +931 * @since 1.4 +932 */ +933 public static String sha384Hex(final String data) { +934 return Hex.encodeHexString(sha384(data)); +935 } +936 +937 /** +938 * Calculates the SHA-512 digest and returns the value as a <code>byte[]</code>. +939 * +940 * @param data +941 * Data to digest +942 * @return SHA-512 digest +943 * @since 1.4 +944 */ +945 public static byte[] sha512(final byte[] data) { +946 return getSha512Digest().digest(data); +947 } +948 +949 /** +950 * Calculates the SHA-512 digest and returns the value as a <code>byte[]</code>. +951 * +952 * @param data +953 * Data to digest +954 * @return SHA-512 digest +955 * @throws IOException +956 * On error reading from the stream +957 * @since 1.4 +958 */ +959 public static byte[] sha512(final InputStream data) throws IOException { +960 return digest(getSha512Digest(), data); +961 } +962 +963 /** +964 * Calculates the SHA-512 digest and returns the value as a <code>byte[]</code>. +965 * +966 * @param data +967 * Data to digest; converted to bytes using {@link StringUtils#getBytesUtf8(String)} +968 * @return SHA-512 digest +969 * @since 1.4 +970 */ +971 public static byte[] sha512(final String data) { +972 return sha512(StringUtils.getBytesUtf8(data)); +973 } +974 +975 /** +976 * Calculates the SHA-512 digest and returns the value as a hex string. +977 * +978 * @param data +979 * Data to digest +980 * @return SHA-512 digest as a hex string +981 * @since 1.4 +982 */ +983 public static String sha512Hex(final byte[] data) { +984 return Hex.encodeHexString(sha512(data)); +985 } +986 +987 /** +988 * Calculates the SHA3-224 digest and returns the value as a hex string. +989 * +990 * @param data +991 * Data to digest +992 * @return SHA3-224 digest as a hex string +993 * @since 1.12 +994 */ +995 public static String sha3_224Hex(final byte[] data) { +996 return Hex.encodeHexString(sha3_224(data)); +997 } +998 +999 /** +1000 * Calculates the SHA3-256 digest and returns the value as a hex string. +1001 * +1002 * @param data +1003 * Data to digest +1004 * @return SHA3-256 digest as a hex string +1005 * @since 1.12 +1006 */ +1007 public static String sha3_256Hex(final byte[] data) { +1008 return Hex.encodeHexString(sha3_256(data)); +1009 } +1010 +1011 /** +1012 * Calculates the SHA3-384 digest and returns the value as a hex string. +1013 * +1014 * @param data +1015 * Data to digest +1016 * @return SHA3-384 digest as a hex string +1017 * @since 1.12 +1018 */ +1019 public static String sha3_384Hex(final byte[] data) { +1020 return Hex.encodeHexString(sha3_384(data)); +1021 } +1022 +1023 /** +1024 * Calculates the SHA3-512 digest and returns the value as a hex string. +1025 * +1026 * @param data +1027 * Data to digest +1028 * @return SHA3-512 digest as a hex string +1029 * @since 1.12 +1030 */ +1031 public static String sha3_512Hex(final byte[] data) { +1032 return Hex.encodeHexString(sha3_512(data)); +1033 } +1034 +1035 /** +1036 * Calculates the SHA-512 digest and returns the value as a hex string. +1037 * +1038 * @param data +1039 * Data to digest +1040 * @return SHA-512 digest as a hex string +1041 * @throws IOException +1042 * On error reading from the stream +1043 * @since 1.4 +1044 */ +1045 public static String sha512Hex(final InputStream data) throws IOException { +1046 return Hex.encodeHexString(sha512(data)); +1047 } +1048 +1049 /** +1050 * Calculates the SHA3-224 digest and returns the value as a hex string. +1051 * +1052 * @param data +1053 * Data to digest +1054 * @return SHA3-224 digest as a hex string +1055 * @throws IOException +1056 * On error reading from the stream +1057 * @since 1.12 +1058 */ +1059 public static String sha3_224Hex(final InputStream data) throws IOException { +1060 return Hex.encodeHexString(sha3_224(data)); +1061 } +1062 +1063 /** +1064 * Calculates the SHA3-256 digest and returns the value as a hex string. +1065 * +1066 * @param data +1067 * Data to digest +1068 * @return SHA3-256 digest as a hex string +1069 * @throws IOException +1070 * On error reading from the stream +1071 * @since 1.12 +1072 */ +1073 public static String sha3_256Hex(final InputStream data) throws IOException { +1074 return Hex.encodeHexString(sha3_256(data)); +1075 } +1076 +1077 /** +1078 * Calculates the SHA3-384 digest and returns the value as a hex string. +1079 * +1080 * @param data +1081 * Data to digest +1082 * @return SHA3-384 digest as a hex string +1083 * @throws IOException +1084 * On error reading from the stream +1085 * @since 1.12 +1086 */ +1087 public static String sha3_384Hex(final InputStream data) throws IOException { +1088 return Hex.encodeHexString(sha3_384(data)); +1089 } +1090 +1091 /** +1092 * Calculates the SHA3-512 digest and returns the value as a hex string. +1093 * +1094 * @param data +1095 * Data to digest +1096 * @return SHA3-512 digest as a hex string +1097 * @throws IOException +1098 * On error reading from the stream +1099 * @since 1.12 +1100 */ +1101 public static String sha3_512Hex(final InputStream data) throws IOException { +1102 return Hex.encodeHexString(sha3_512(data)); +1103 } +1104 +1105 /** +1106 * Calculates the SHA-512 digest and returns the value as a hex string. +1107 * +1108 * @param data +1109 * Data to digest +1110 * @return SHA-512 digest as a hex string +1111 * @since 1.4 +1112 */ +1113 public static String sha512Hex(final String data) { +1114 return Hex.encodeHexString(sha512(data)); +1115 } +1116 +1117 /** +1118 * Calculates the SHA-1 digest and returns the value as a hex string. +1119 * +1120 * @param data +1121 * Data to digest +1122 * @return SHA-1 digest as a hex string +1123 * @deprecated (1.11) Use {@link #sha1Hex(byte[])} +1124 */ +1125 @Deprecated +1126 public static String shaHex(final byte[] data) { +1127 return sha1Hex(data); +1128 } +1129 +1130 /** +1131 * Calculates the SHA-1 digest and returns the value as a hex string. +1132 * +1133 * @param data +1134 * Data to digest +1135 * @return SHA-1 digest as a hex string +1136 * @throws IOException +1137 * On error reading from the stream +1138 * @since 1.4 +1139 * @deprecated (1.11) Use {@link #sha1Hex(InputStream)} +1140 */ +1141 @Deprecated +1142 public static String shaHex(final InputStream data) throws IOException { +1143 return sha1Hex(data); +1144 } +1145 +1146 /** +1147 * Calculates the SHA-1 digest and returns the value as a hex string. +1148 * +1149 * @param data +1150 * Data to digest +1151 * @return SHA-1 digest as a hex string +1152 * @deprecated (1.11) Use {@link #sha1Hex(String)} +1153 */ +1154 @Deprecated +1155 public static String shaHex(final String data) { +1156 return sha1Hex(data); +1157 } +1158 +1159 /** +1160 * Updates the given {@link MessageDigest}. +1161 * +1162 * @param messageDigest +1163 * the {@link MessageDigest} to update +1164 * @param valueToDigest +1165 * the value to update the {@link MessageDigest} with +1166 * @return the updated {@link MessageDigest} +1167 * @since 1.7 +1168 */ +1169 public static MessageDigest updateDigest(final MessageDigest messageDigest, final byte[] valueToDigest) { +1170 messageDigest.update(valueToDigest); +1171 return messageDigest; +1172 } +1173 +1174 /** +1175 * Updates the given {@link MessageDigest}. +1176 * +1177 * @param messageDigest +1178 * the {@link MessageDigest} to update +1179 * @param valueToDigest +1180 * the value to update the {@link MessageDigest} with +1181 * @return the updated {@link MessageDigest} +1182 * @since 1.11 +1183 */ +1184 public static MessageDigest updateDigest(final MessageDigest messageDigest, final ByteBuffer valueToDigest) { +1185 messageDigest.update(valueToDigest); +1186 return messageDigest; +1187 } +1188 +1189 /** +1190 * Reads through a File and updates the digest for the data +1191 * +1192 * @param digest +1193 * The MessageDigest to use (e.g. MD5) +1194 * @param data +1195 * Data to digest +1196 * @return the digest +1197 * @throws IOException +1198 * On error reading from the stream +1199 * @since 1.11 +1200 */ +1201 public static MessageDigest updateDigest(final MessageDigest digest, final File data) throws IOException { +1202 try (final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(data))) { +1203 return updateDigest(digest, stream); +1204 } +1205 } +1206 +1207 /** +1208 * Reads through an InputStream and updates the digest for the data +1209 * +1210 * @param digest +1211 * The MessageDigest to use (e.g. MD5) +1212 * @param data +1213 * Data to digest +1214 * @return the digest +1215 * @throws IOException +1216 * On error reading from the stream +1217 * @since 1.8 +1218 */ +1219 public static MessageDigest updateDigest(final MessageDigest digest, final InputStream data) throws IOException { +1220 final byte[] buffer = new byte[STREAM_BUFFER_LENGTH]; +1221 int read = data.read(buffer, 0, STREAM_BUFFER_LENGTH); +1222 +1223 while (read > -1) { +1224 digest.update(buffer, 0, read); +1225 read = data.read(buffer, 0, STREAM_BUFFER_LENGTH); +1226 } +1227 +1228 return digest; +1229 } +1230 +1231 /** +1232 * Updates the given {@link MessageDigest} from a String (converted to bytes using UTF-8). +1233 * <p> +1234 * To update the digest using a different charset for the conversion, +1235 * convert the String to a byte array using +1236 * {@link String#getBytes(java.nio.charset.Charset)} and pass that +1237 * to the {@link DigestUtils#updateDigest(MessageDigest, byte[])} method +1238 * +1239 * @param messageDigest +1240 * the {@link MessageDigest} to update +1241 * @param valueToDigest +1242 * the value to update the {@link MessageDigest} with; +1243 * converted to bytes using {@link StringUtils#getBytesUtf8(String)} +1244 * @return the updated {@link MessageDigest} +1245 * @since 1.7 +1246 */ +1247 public static MessageDigest updateDigest(final MessageDigest messageDigest, final String valueToDigest) { +1248 messageDigest.update(StringUtils.getBytesUtf8(valueToDigest)); +1249 return messageDigest; +1250 } +1251 +1252 private final MessageDigest messageDigest; +1253 +1254 /** +1255 * Preserves binary compatibity only. +1256 * As for previous versions does not provide useful behaviour +1257 * @deprecated since 1.11; only useful to preserve binary compatibility +1258 */ +1259 @Deprecated +1260 public DigestUtils() { +1261 this.messageDigest = null; +1262 } +1263 +1264 /** +1265 * Creates an instance using the provided {@link MessageDigest} parameter. +1266 * +1267 * This can then be used to create digests using methods such as +1268 * {@link #digest(byte[])} and {@link #digestAsHex(File)}. +1269 * +1270 * @param digest the {@link MessageDigest} to use +1271 * @since 1.11 +1272 */ +1273 public DigestUtils(final MessageDigest digest) { +1274 this.messageDigest = digest; +1275 } +1276 +1277 /** +1278 * Creates an instance using the provided {@link MessageDigest} parameter. +1279 * +1280 * This can then be used to create digests using methods such as +1281 * {@link #digest(byte[])} and {@link #digestAsHex(File)}. +1282 * +1283 * @param name the name of the {@link MessageDigest} to use +1284 * @see #getDigest(String) +1285 * @throws IllegalArgumentException +1286 * when a {@link NoSuchAlgorithmException} is caught. +1287 * @since 1.11 +1288 */ +1289 public DigestUtils(final String name) { +1290 this(getDigest(name)); +1291 } +1292 +1293 /** +1294 * Reads through a byte array and returns the digest for the data. +1295 * +1296 * @param data +1297 * Data to digest +1298 * @return the digest +1299 * @since 1.11 +1300 */ +1301 public byte[] digest(final byte[] data) { +1302 return updateDigest(messageDigest, data).digest(); +1303 } +1304 +1305 /** +1306 * Reads through a ByteBuffer and returns the digest for the data +1307 * +1308 * @param data +1309 * Data to digest +1310 * @return the digest +1311 * +1312 * @since 1.11 +1313 */ +1314 public byte[] digest(final ByteBuffer data) { +1315 return updateDigest(messageDigest, data).digest(); +1316 } +1317 +1318 /** +1319 * Reads through a File and returns the digest for the data +1320 * +1321 * @param data +1322 * Data to digest +1323 * @return the digest +1324 * @throws IOException +1325 * On error reading from the stream +1326 * @since 1.11 +1327 */ +1328 public byte[] digest(final File data) throws IOException { +1329 return updateDigest(messageDigest, data).digest(); +1330 } +1331 +1332 /** +1333 * Reads through an InputStream and returns the digest for the data +1334 * +1335 * @param data +1336 * Data to digest +1337 * @return the digest +1338 * @throws IOException +1339 * On error reading from the stream +1340 * @since 1.11 +1341 */ +1342 public byte[] digest(final InputStream data) throws IOException { +1343 return updateDigest(messageDigest, data).digest(); +1344 } +1345 +1346 /** +1347 * Reads through a byte array and returns the digest for the data. +1348 * +1349 * @param data +1350 * Data to digest treated as UTF-8 string +1351 * @return the digest +1352 * @since 1.11 +1353 */ +1354 public byte[] digest(final String data) { +1355 return updateDigest(messageDigest, data).digest(); +1356 } +1357 +1358 /** +1359 * Reads through a byte array and returns the digest for the data. +1360 * +1361 * @param data +1362 * Data to digest +1363 * @return the digest as a hex string +1364 * @since 1.11 +1365 */ +1366 public String digestAsHex(final byte[] data) { +1367 return Hex.encodeHexString(digest(data)); +1368 } +1369 +1370 /** +1371 * Reads through a ByteBuffer and returns the digest for the data +1372 * +1373 * @param data +1374 * Data to digest +1375 * @return the digest as a hex string +1376 * +1377 * @since 1.11 +1378 */ +1379 public String digestAsHex(final ByteBuffer data) { +1380 return Hex.encodeHexString(digest(data)); +1381 } +1382 +1383 /** +1384 * Reads through a File and returns the digest for the data +1385 * +1386 * @param data +1387 * Data to digest +1388 * @return the digest as a hex string +1389 * @throws IOException +1390 * On error reading from the stream +1391 * @since 1.11 +1392 */ +1393 public String digestAsHex(final File data) throws IOException { +1394 return Hex.encodeHexString(digest(data)); +1395 } +1396 +1397 /** +1398 * Reads through an InputStream and returns the digest for the data +1399 * +1400 * @param data +1401 * Data to digest +1402 * @return the digest as a hex string +1403 * @throws IOException +1404 * On error reading from the stream +1405 * @since 1.11 +1406 */ +1407 public String digestAsHex(final InputStream data) throws IOException { +1408 return Hex.encodeHexString(digest(data)); +1409 } +1410 +1411 /** +1412 * Reads through a byte array and returns the digest for the data. +1413 * +1414 * @param data +1415 * Data to digest treated as UTF-8 string +1416 * @return the digest as a hex string +1417 * @since 1.11 +1418 */ +1419 public String digestAsHex(final String data) { +1420 return Hex.encodeHexString(digest(data)); +1421 } +1422 +1423 /** +1424 * Returns the message digest instance. +1425 * @return the message digest instance +1426 * @since 1.11 +1427 */ +1428 public MessageDigest getMessageDigest() { +1429 return messageDigest; +1430 } +1431 +1432} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.digest; +019 +020/** +021 * Standard {@link HmacUtils} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name +022 * Documentation</cite>. +023 * +024 * <p> +025 * <strong>Note: Not all JCE implementations support all the algorithms in this enum.</strong> +026 * </p> +027 * +028 * @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java +029 * 6 Cryptography Architecture Sun Providers Documentation</a> +030 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java +031 * 7 Cryptography Architecture Sun Providers Documentation</a> +032 * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> Java +033 * 8 Cryptography Architecture Sun Providers Documentation</a> +034 * @see <a href= +035 * "http://docs.oracle.com/javase/9/security/oracleproviders.htm#JSSEC-GUID-A47B1249-593C-4C38-A0D0-68FA7681E0A7"> +036 * Java 9 Cryptography Architecture Sun Providers Documentation</a> +037 * @since 1.10 +038 * @version $Id$ +039 */ +040public enum HmacAlgorithms { +041 +042 /** +043 * The HmacMD5 Message Authentication Code (MAC) algorithm specified in RFC 2104 and RFC 1321. +044 * <p> +045 * Every implementation of the Java platform is required to support this standard MAC algorithm. +046 * </p> +047 */ +048 HMAC_MD5("HmacMD5"), +049 +050 /** +051 * The HmacSHA1 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. +052 * <p> +053 * Every implementation of the Java platform is required to support this standard MAC algorithm. +054 * </p> +055 */ +056 HMAC_SHA_1("HmacSHA1"), +057 +058 /** +059 * The HmacSHA224 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. +060 * <p> +061 * Every implementation of the Java 8+ platform is required to support this standard MAC algorithm. +062 * </p> +063 * @since 1.11 +064 */ +065 HMAC_SHA_224("HmacSHA224"), +066 +067 /** +068 * The HmacSHA256 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. +069 * <p> +070 * Every implementation of the Java platform is required to support this standard MAC algorithm. +071 * </p> +072 */ +073 HMAC_SHA_256("HmacSHA256"), +074 +075 /** +076 * The HmacSHA384 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. +077 * <p> +078 * This MAC algorithm is <em>optional</em>; not all implementations support it. +079 * </p> +080 */ +081 HMAC_SHA_384("HmacSHA384"), +082 +083 /** +084 * The HmacSHA512 Message Authentication Code (MAC) algorithm specified in RFC 2104 and FIPS PUB 180-2. +085 * <p> +086 * This MAC algorithm is <em>optional</em>; not all implementations support it. +087 * </p> +088 */ +089 HMAC_SHA_512("HmacSHA512"); +090 +091 private final String name; +092 +093 private HmacAlgorithms(final String algorithm) { +094 this.name = algorithm; +095 } +096 +097 /** +098 * Gets the algorithm name. +099 * +100 * @return the algorithm name. +101 * @since 1.11 +102 */ +103 public String getName() { +104 return name; +105 } +106 +107 /** +108 * The algorithm name +109 * +110 * @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> +111 * Java 6 Cryptography Architecture Sun Providers Documentation</a> +112 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> +113 * Java 7 Cryptography Architecture Sun Providers Documentation</a> +114 * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/SunProviders.html#SunJCEProvider"> +115 * Java 8 Cryptography Architecture Sun Providers Documentation</a> +116 * @see <a href= +117 * "http://docs.oracle.com/javase/9/security/oracleproviders.htm#JSSEC-GUID-A47B1249-593C-4C38-A0D0-68FA7681E0A7"> +118 * Java 9 Cryptography Architecture Sun Providers Documentation</a> +119 * @return The algorithm name ("HmacSHA512" for example) +120 */ +121 @Override +122 public String toString() { +123 return name; +124 } +125 +126} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.digest; +019 +020import java.io.BufferedInputStream; +021import java.io.File; +022import java.io.FileInputStream; +023import java.io.IOException; +024import java.io.InputStream; +025import java.nio.ByteBuffer; +026import java.security.InvalidKeyException; +027import java.security.Key; +028import java.security.NoSuchAlgorithmException; +029 +030import javax.crypto.Mac; +031import javax.crypto.spec.SecretKeySpec; +032 +033import org.apache.commons.codec.binary.Hex; +034import org.apache.commons.codec.binary.StringUtils; +035 +036/** +037 * Simplifies common {@link javax.crypto.Mac} tasks. This class is immutable and thread-safe. +038 * However the Mac may not be. +039 * <p> +040 * <strong>Note: Not all JCE implementations support all algorithms. If not supported, an IllegalArgumentException is +041 * thrown.</strong> +042 * <p> +043 * Sample usage: +044 * <pre> +045 * import static HmacAlgorithms.*; +046 * byte[] key = {1,2,3,4}; // don't use this actual key! +047 * String valueToDigest = "The quick brown fox jumps over the lazy dog"; +048 * byte[] hmac = new HmacUtils(HMAC_SHA_224, key).hmac(valueToDigest); +049 * // Mac re-use +050 * HmacUtils hm1 = new HmacUtils("HmacAlgoName", key); // use a valid name here! +051 * String hexPom = hm1.hmacHex(new File("pom.xml")); +052 * String hexNot = hm1.hmacHex(new File("NOTICE.txt")); +053 * </pre> +054 * @since 1.10 +055 * @version $Id$ +056 */ +057public final class HmacUtils { +058 +059 private static final int STREAM_BUFFER_LENGTH = 1024; +060 +061 /** +062 * Returns whether this algorithm is available +063 * +064 *@param name the name to check +065 * @return whether this algorithm is available +066 * @since 1.11 +067 */ +068 public static boolean isAvailable(final String name) { +069 try { +070 Mac.getInstance(name); +071 return true; +072 } catch (final NoSuchAlgorithmException e) { +073 return false; +074 } +075 } +076 +077 /** +078 * Returns whether this algorithm is available +079 * +080 *@param name the name to check +081 * @return whether this algorithm is available +082 * @since 1.11 +083 */ +084 public static boolean isAvailable(final HmacAlgorithms name) { +085 try { +086 Mac.getInstance(name.getName()); +087 return true; +088 } catch (final NoSuchAlgorithmException e) { +089 return false; +090 } +091 } +092 +093 /** +094 * Returns an initialized <code>Mac</code> for the HmacMD5 algorithm. +095 * <p> +096 * Every implementation of the Java platform is required to support this standard Mac algorithm. +097 * </p> +098 * +099 * @param key +100 * They key for the keyed digest (must not be null) +101 * @return A Mac instance initialized with the given key. +102 * @see Mac#getInstance(String) +103 * @see Mac#init(Key) +104 * @throws IllegalArgumentException +105 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +106 * @deprecated (1.11) Use {@code getInitializedMac(HmacAlgorithms.HMAC_MD5, byte[])} +107 */ +108 @Deprecated +109 public static Mac getHmacMd5(final byte[] key) { +110 return getInitializedMac(HmacAlgorithms.HMAC_MD5, key); +111 } +112 +113 /** +114 * Returns an initialized <code>Mac</code> for the HmacSHA1 algorithm. +115 * <p> +116 * Every implementation of the Java platform is required to support this standard Mac algorithm. +117 * </p> +118 * +119 * @param key +120 * They key for the keyed digest (must not be null) +121 * @return A Mac instance initialized with the given key. +122 * @see Mac#getInstance(String) +123 * @see Mac#init(Key) +124 * @throws IllegalArgumentException +125 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +126 * @deprecated (1.11) Use {@code getInitializedMac(HmacAlgorithms.HMAC_SHA_1, byte[])} +127 */ +128 @Deprecated +129 public static Mac getHmacSha1(final byte[] key) { +130 return getInitializedMac(HmacAlgorithms.HMAC_SHA_1, key); +131 } +132 +133 /** +134 * Returns an initialized <code>Mac</code> for the HmacSHA256 algorithm. +135 * <p> +136 * Every implementation of the Java platform is required to support this standard Mac algorithm. +137 * </p> +138 * +139 * @param key +140 * They key for the keyed digest (must not be null) +141 * @return A Mac instance initialized with the given key. +142 * @see Mac#getInstance(String) +143 * @see Mac#init(Key) +144 * @throws IllegalArgumentException +145 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +146 * @deprecated (1.11) Use {@code getInitializedMac(HmacAlgorithms.HMAC_SHA_256, byte[])} +147 */ +148 @Deprecated +149 public static Mac getHmacSha256(final byte[] key) { +150 return getInitializedMac(HmacAlgorithms.HMAC_SHA_256, key); +151 } +152 +153 /** +154 * Returns an initialized <code>Mac</code> for the HmacSHA384 algorithm. +155 * <p> +156 * Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm. +157 * </p> +158 * +159 * @param key +160 * They key for the keyed digest (must not be null) +161 * @return A Mac instance initialized with the given key. +162 * @see Mac#getInstance(String) +163 * @see Mac#init(Key) +164 * @throws IllegalArgumentException +165 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +166 * @deprecated (1.11) Use {@code getInitializedMac(HmacAlgorithms.HMAC_SHA_384, byte[])} +167 */ +168 @Deprecated +169 public static Mac getHmacSha384(final byte[] key) { +170 return getInitializedMac(HmacAlgorithms.HMAC_SHA_384, key); +171 } +172 +173 /** +174 * Returns an initialized <code>Mac</code> for the HmacSHA512 algorithm. +175 * <p> +176 * Every implementation of the Java platform is <em>not</em> required to support this Mac algorithm. +177 * </p> +178 * +179 * @param key +180 * They key for the keyed digest (must not be null) +181 * @return A Mac instance initialized with the given key. +182 * @see Mac#getInstance(String) +183 * @see Mac#init(Key) +184 * @throws IllegalArgumentException +185 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +186 * @deprecated (1.11) Use {@code getInitializedMac(HmacAlgorithms.HMAC_SHA_512, byte[])} +187 */ +188 @Deprecated +189 public static Mac getHmacSha512(final byte[] key) { +190 return getInitializedMac(HmacAlgorithms.HMAC_SHA_512, key); +191 } +192 +193 /** +194 * Returns an initialized <code>Mac</code> for the given <code>algorithm</code>. +195 * +196 * @param algorithm +197 * the name of the algorithm requested. See +198 * <a href= "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" +199 * >Appendix A in the Java Cryptography Architecture Reference Guide</a> for information about standard +200 * algorithm names. +201 * @param key +202 * They key for the keyed digest (must not be null) +203 * @return A Mac instance initialized with the given key. +204 * @see Mac#getInstance(String) +205 * @see Mac#init(Key) +206 * @throws IllegalArgumentException +207 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +208 */ +209 public static Mac getInitializedMac(final HmacAlgorithms algorithm, final byte[] key) { +210 return getInitializedMac(algorithm.getName(), key); +211 } +212 +213 /** +214 * Returns an initialized <code>Mac</code> for the given <code>algorithm</code>. +215 * +216 * @param algorithm +217 * the name of the algorithm requested. See +218 * <a href= "http://docs.oracle.com/javase/6/docs/technotes/guides/security/crypto/CryptoSpec.html#AppA" +219 * >Appendix A in the Java Cryptography Architecture Reference Guide</a> for information about standard +220 * algorithm names. +221 * @param key +222 * They key for the keyed digest (must not be null) +223 * @return A Mac instance initialized with the given key. +224 * @see Mac#getInstance(String) +225 * @see Mac#init(Key) +226 * @throws IllegalArgumentException +227 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +228 */ +229 public static Mac getInitializedMac(final String algorithm, final byte[] key) { +230 +231 if (key == null) { +232 throw new IllegalArgumentException("Null key"); +233 } +234 +235 try { +236 final SecretKeySpec keySpec = new SecretKeySpec(key, algorithm); +237 final Mac mac = Mac.getInstance(algorithm); +238 mac.init(keySpec); +239 return mac; +240 } catch (final NoSuchAlgorithmException e) { +241 throw new IllegalArgumentException(e); +242 } catch (final InvalidKeyException e) { +243 throw new IllegalArgumentException(e); +244 } +245 } +246 +247 // hmacMd5 +248 +249 /** +250 * Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value. +251 * +252 * @param key +253 * They key for the keyed digest (must not be null) +254 * @param valueToDigest +255 * The value (data) which should to digest (maybe empty or null) +256 * @return HmacMD5 MAC for the given key and value +257 * @throws IllegalArgumentException +258 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +259 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(byte[])} +260 */ +261 @Deprecated +262 public static byte[] hmacMd5(final byte[] key, final byte[] valueToDigest) { +263 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmac(valueToDigest); +264 } +265 +266 /** +267 * Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value. +268 * +269 * @param key +270 * They key for the keyed digest (must not be null) +271 * @param valueToDigest +272 * The value (data) which should to digest +273 * <p> +274 * The InputStream must not be null and will not be closed +275 * </p> +276 * @return HmacMD5 MAC for the given key and value +277 * @throws IOException +278 * If an I/O error occurs. +279 * @throws IllegalArgumentException +280 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +281 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmac(InputStream)} +282 */ +283 @Deprecated +284 public static byte[] hmacMd5(final byte[] key, final InputStream valueToDigest) throws IOException { +285 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmac(valueToDigest); +286 } +287 +288 /** +289 * Returns a HmacMD5 Message Authentication Code (MAC) for the given key and value. +290 * +291 * @param key +292 * They key for the keyed digest (must not be null) +293 * @param valueToDigest +294 * The value (data) which should to digest (maybe empty or null) +295 * @return HmacMD5 MAC for the given key and value +296 * @throws IllegalArgumentException +297 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +298 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmac(String)} +299 */ +300 @Deprecated +301 public static byte[] hmacMd5(final String key, final String valueToDigest) { +302 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmac(valueToDigest); +303 } +304 +305 /** +306 * Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value. +307 * +308 * @param key +309 * They key for the keyed digest (must not be null) +310 * @param valueToDigest +311 * The value (data) which should to digest (maybe empty or null) +312 * @return HmacMD5 MAC for the given key and value as a hex string (lowercase) +313 * @throws IllegalArgumentException +314 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +315 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(byte[])} +316 */ +317 @Deprecated +318 public static String hmacMd5Hex(final byte[] key, final byte[] valueToDigest) { +319 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmacHex(valueToDigest); +320 } +321 +322 /** +323 * Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value. +324 * +325 * @param key +326 * They key for the keyed digest (must not be null) +327 * @param valueToDigest +328 * The value (data) which should to digest +329 * <p> +330 * The InputStream must not be null and will not be closed +331 * </p> +332 * @return HmacMD5 MAC for the given key and value as a hex string (lowercase) +333 * @throws IOException +334 * If an I/O error occurs. +335 * @throws IllegalArgumentException +336 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +337 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, byte[]).hmacHex(InputStream)} +338 */ +339 @Deprecated +340 public static String hmacMd5Hex(final byte[] key, final InputStream valueToDigest) throws IOException { +341 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmacHex(valueToDigest); +342 } +343 +344 /** +345 * Returns a HmacMD5 Message Authentication Code (MAC) as a hex string (lowercase) for the given key and value. +346 * +347 * @param key +348 * They key for the keyed digest (must not be null) +349 * @param valueToDigest +350 * The value (data) which should to digest (maybe empty or null) +351 * @return HmacMD5 MAC for the given key and value as a hex string (lowercase) +352 * @throws IllegalArgumentException +353 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +354 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_MD5, String).hmacHex(String)} +355 */ +356 @Deprecated +357 public static String hmacMd5Hex(final String key, final String valueToDigest) { +358 return new HmacUtils(HmacAlgorithms.HMAC_MD5, key).hmacHex(valueToDigest); +359 } +360 +361 // hmacSha1 +362 +363 /** +364 * Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value. +365 * +366 * @param key +367 * They key for the keyed digest (must not be null) +368 * @param valueToDigest +369 * The value (data) which should to digest (maybe empty or null) +370 * @return HmacSHA1 MAC for the given key and value +371 * @throws IllegalArgumentException +372 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +373 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(byte[])} +374 */ +375 @Deprecated +376 public static byte[] hmacSha1(final byte[] key, final byte[] valueToDigest) { +377 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmac(valueToDigest); +378 } +379 +380 /** +381 * Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value. +382 * +383 * @param key +384 * They key for the keyed digest (must not be null) +385 * @param valueToDigest +386 * The value (data) which should to digest +387 * <p> +388 * The InputStream must not be null and will not be closed +389 * </p> +390 * @return HmacSHA1 MAC for the given key and value +391 * @throws IOException +392 * If an I/O error occurs. +393 * @throws IllegalArgumentException +394 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +395 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmac(InputStream)} +396 */ +397 @Deprecated +398 public static byte[] hmacSha1(final byte[] key, final InputStream valueToDigest) throws IOException { +399 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmac(valueToDigest); +400 } +401 +402 /** +403 * Returns a HmacSHA1 Message Authentication Code (MAC) for the given key and value. +404 * +405 * @param key +406 * They key for the keyed digest (must not be null) +407 * @param valueToDigest +408 * The value (data) which should to digest (maybe empty or null) +409 * @return HmacSHA1 MAC for the given key and value +410 * @throws IllegalArgumentException +411 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +412 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmac(String)} +413 */ +414 @Deprecated +415 public static byte[] hmacSha1(final String key, final String valueToDigest) { +416 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmac(valueToDigest); +417 } +418 +419 /** +420 * Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +421 * +422 * @param key +423 * They key for the keyed digest (must not be null) +424 * @param valueToDigest +425 * The value (data) which should to digest (maybe empty or null) +426 * @return HmacSHA1 MAC for the given key and value as hex string (lowercase) +427 * @throws IllegalArgumentException +428 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +429 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(byte[])} +430 */ +431 @Deprecated +432 public static String hmacSha1Hex(final byte[] key, final byte[] valueToDigest) { +433 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmacHex(valueToDigest); +434 } +435 +436 /** +437 * Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +438 * +439 * @param key +440 * They key for the keyed digest (must not be null) +441 * @param valueToDigest +442 * The value (data) which should to digest +443 * <p> +444 * The InputStream must not be null and will not be closed +445 * </p> +446 * @return HmacSHA1 MAC for the given key and value as hex string (lowercase) +447 * @throws IOException +448 * If an I/O error occurs. +449 * @throws IllegalArgumentException +450 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +451 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, byte[]).hmacHex(InputStream)} +452 */ +453 @Deprecated +454 public static String hmacSha1Hex(final byte[] key, final InputStream valueToDigest) throws IOException { +455 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmacHex(valueToDigest); +456 } +457 +458 /** +459 * Returns a HmacSHA1 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +460 * +461 * @param key +462 * They key for the keyed digest (must not be null) +463 * @param valueToDigest +464 * The value (data) which should to digest (maybe empty or null) +465 * @return HmacSHA1 MAC for the given key and value as hex string (lowercase) +466 * @throws IllegalArgumentException +467 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +468 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_1, String).hmacHex(String)} +469 */ +470 @Deprecated +471 public static String hmacSha1Hex(final String key, final String valueToDigest) { +472 return new HmacUtils(HmacAlgorithms.HMAC_SHA_1, key).hmacHex(valueToDigest); +473 } +474 +475 // hmacSha256 +476 +477 /** +478 * Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value. +479 * +480 * @param key +481 * They key for the keyed digest (must not be null) +482 * @param valueToDigest +483 * The value (data) which should to digest (maybe empty or null) +484 * @return HmacSHA256 MAC for the given key and value +485 * @throws IllegalArgumentException +486 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +487 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(byte[])} +488 */ +489 @Deprecated +490 public static byte[] hmacSha256(final byte[] key, final byte[] valueToDigest) { +491 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmac(valueToDigest); +492 } +493 +494 /** +495 * Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value. +496 * +497 * @param key +498 * They key for the keyed digest (must not be null) +499 * @param valueToDigest +500 * The value (data) which should to digest +501 * <p> +502 * The InputStream must not be null and will not be closed +503 * </p> +504 * @return HmacSHA256 MAC for the given key and value +505 * @throws IOException +506 * If an I/O error occurs. +507 * @throws IllegalArgumentException +508 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +509 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmac(InputStream)} +510 */ +511 @Deprecated +512 public static byte[] hmacSha256(final byte[] key, final InputStream valueToDigest) throws IOException { +513 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmac(valueToDigest); +514 } +515 +516 /** +517 * Returns a HmacSHA256 Message Authentication Code (MAC) for the given key and value. +518 * +519 * @param key +520 * They key for the keyed digest (must not be null) +521 * @param valueToDigest +522 * The value (data) which should to digest (maybe empty or null) +523 * @return HmacSHA256 MAC for the given key and value +524 * @throws IllegalArgumentException +525 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +526 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmac(String)} +527 */ +528 @Deprecated +529 public static byte[] hmacSha256(final String key, final String valueToDigest) { +530 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmac(valueToDigest); +531 } +532 +533 /** +534 * Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +535 * +536 * @param key +537 * They key for the keyed digest (must not be null) +538 * @param valueToDigest +539 * The value (data) which should to digest (maybe empty or null) +540 * @return HmacSHA256 MAC for the given key and value as hex string (lowercase) +541 * @throws IllegalArgumentException +542 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +543 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(byte[])} +544 */ +545 @Deprecated +546 public static String hmacSha256Hex(final byte[] key, final byte[] valueToDigest) { +547 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmacHex(valueToDigest); +548 } +549 +550 /** +551 * Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +552 * +553 * @param key +554 * They key for the keyed digest (must not be null) +555 * @param valueToDigest +556 * The value (data) which should to digest +557 * <p> +558 * The InputStream must not be null and will not be closed +559 * </p> +560 * @return HmacSHA256 MAC for the given key and value as hex string (lowercase) +561 * @throws IOException +562 * If an I/O error occurs. +563 * @throws IllegalArgumentException +564 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +565 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, byte[]).hmacHex(InputStream)} +566 */ +567 @Deprecated +568 public static String hmacSha256Hex(final byte[] key, final InputStream valueToDigest) throws IOException { +569 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmacHex(valueToDigest); +570 } +571 +572 /** +573 * Returns a HmacSHA256 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +574 * +575 * @param key +576 * They key for the keyed digest (must not be null) +577 * @param valueToDigest +578 * The value (data) which should to digest (maybe empty or null) +579 * @return HmacSHA256 MAC for the given key and value as hex string (lowercase) +580 * @throws IllegalArgumentException +581 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +582 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_256, String).hmacHex(String)} +583 */ +584 @Deprecated +585 public static String hmacSha256Hex(final String key, final String valueToDigest) { +586 return new HmacUtils(HmacAlgorithms.HMAC_SHA_256, key).hmacHex(valueToDigest); +587 } +588 +589 // hmacSha384 +590 +591 /** +592 * Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value. +593 * +594 * @param key +595 * They key for the keyed digest (must not be null) +596 * @param valueToDigest +597 * The value (data) which should to digest (maybe empty or null) +598 * @return HmacSHA384 MAC for the given key and value +599 * @throws IllegalArgumentException +600 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +601 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(byte[])} +602 */ +603 @Deprecated +604 public static byte[] hmacSha384(final byte[] key, final byte[] valueToDigest) { +605 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmac(valueToDigest); +606 } +607 +608 /** +609 * Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value. +610 * +611 * @param key +612 * They key for the keyed digest (must not be null) +613 * @param valueToDigest +614 * The value (data) which should to digest +615 * <p> +616 * The InputStream must not be null and will not be closed +617 * </p> +618 * @return HmacSHA384 MAC for the given key and value +619 * @throws IOException +620 * If an I/O error occurs. +621 * @throws IllegalArgumentException +622 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +623 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmac(InputStream)} +624 */ +625 @Deprecated +626 public static byte[] hmacSha384(final byte[] key, final InputStream valueToDigest) throws IOException { +627 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmac(valueToDigest); +628 } +629 +630 /** +631 * Returns a HmacSHA384 Message Authentication Code (MAC) for the given key and value. +632 * +633 * @param key +634 * They key for the keyed digest (must not be null) +635 * @param valueToDigest +636 * The value (data) which should to digest (maybe empty or null) +637 * @return HmacSHA384 MAC for the given key and value +638 * @throws IllegalArgumentException +639 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +640 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmac(String)} +641 */ +642 @Deprecated +643 public static byte[] hmacSha384(final String key, final String valueToDigest) { +644 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmac(valueToDigest); +645 } +646 +647 /** +648 * Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +649 * +650 * @param key +651 * They key for the keyed digest (must not be null) +652 * @param valueToDigest +653 * The value (data) which should to digest (maybe empty or null) +654 * @return HmacSHA384 MAC for the given key and value as hex string (lowercase) +655 * @throws IllegalArgumentException +656 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +657 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(byte[])} +658 */ +659 @Deprecated +660 public static String hmacSha384Hex(final byte[] key, final byte[] valueToDigest) { +661 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmacHex(valueToDigest); +662 } +663 +664 /** +665 * Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +666 * +667 * @param key +668 * They key for the keyed digest (must not be null) +669 * @param valueToDigest +670 * The value (data) which should to digest +671 * <p> +672 * The InputStream must not be null and will not be closed +673 * </p> +674 * @return HmacSHA384 MAC for the given key and value as hex string (lowercase) +675 * @throws IOException +676 * If an I/O error occurs. +677 * @throws IllegalArgumentException +678 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +679 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, byte[]).hmacHex(InputStream)} +680 */ +681 @Deprecated +682 public static String hmacSha384Hex(final byte[] key, final InputStream valueToDigest) throws IOException { +683 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmacHex(valueToDigest); +684 } +685 +686 /** +687 * Returns a HmacSHA384 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +688 * +689 * @param key +690 * They key for the keyed digest (must not be null) +691 * @param valueToDigest +692 * The value (data) which should to digest (maybe empty or null) +693 * @return HmacSHA384 MAC for the given key and value as hex string (lowercase) +694 * @throws IllegalArgumentException +695 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +696 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_384, String).hmacHex(String)} +697 */ +698 @Deprecated +699 public static String hmacSha384Hex(final String key, final String valueToDigest) { +700 return new HmacUtils(HmacAlgorithms.HMAC_SHA_384, key).hmacHex(valueToDigest); +701 } +702 +703 // hmacSha512 +704 +705 /** +706 * Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value. +707 * +708 * @param key +709 * They key for the keyed digest (must not be null) +710 * @param valueToDigest +711 * The value (data) which should to digest (maybe empty or null) +712 * @return HmacSHA512 MAC for the given key and value +713 * @throws IllegalArgumentException +714 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +715 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(byte[])} +716 */ +717 @Deprecated +718 public static byte[] hmacSha512(final byte[] key, final byte[] valueToDigest) { +719 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmac(valueToDigest); +720 } +721 +722 /** +723 * Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value. +724 * +725 * @param key +726 * They key for the keyed digest (must not be null) +727 * @param valueToDigest +728 * The value (data) which should to digest +729 * <p> +730 * The InputStream must not be null and will not be closed +731 * </p> +732 * @return HmacSHA512 MAC for the given key and value +733 * @throws IOException +734 * If an I/O error occurs. +735 * @throws IllegalArgumentException +736 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +737 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmac(InputStream)} +738 */ +739 @Deprecated +740 public static byte[] hmacSha512(final byte[] key, final InputStream valueToDigest) throws IOException { +741 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmac(valueToDigest); +742 } +743 +744 /** +745 * Returns a HmacSHA512 Message Authentication Code (MAC) for the given key and value. +746 * +747 * @param key +748 * They key for the keyed digest (must not be null) +749 * @param valueToDigest +750 * The value (data) which should to digest (maybe empty or null) +751 * @return HmacSHA512 MAC for the given key and value +752 * @throws IllegalArgumentException +753 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +754 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmac(String)} +755 */ +756 @Deprecated +757 public static byte[] hmacSha512(final String key, final String valueToDigest) { +758 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmac(valueToDigest); +759 } +760 +761 /** +762 * Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +763 * +764 * @param key +765 * They key for the keyed digest (must not be null) +766 * @param valueToDigest +767 * The value (data) which should to digest (maybe empty or null) +768 * @return HmacSHA512 MAC for the given key and value as hex string (lowercase) +769 * @throws IllegalArgumentException +770 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +771 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(byte[])} +772 */ +773 @Deprecated +774 public static String hmacSha512Hex(final byte[] key, final byte[] valueToDigest) { +775 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmacHex(valueToDigest); +776 } +777 +778 /** +779 * Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +780 * +781 * @param key +782 * They key for the keyed digest (must not be null) +783 * @param valueToDigest +784 * The value (data) which should to digest +785 * <p> +786 * The InputStream must not be null and will not be closed +787 * </p> +788 * @return HmacSHA512 MAC for the given key and value as hex string (lowercase) +789 * @throws IOException +790 * If an I/O error occurs. +791 * @throws IllegalArgumentException +792 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +793 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, byte[]).hmacHex(InputStream)} +794 */ +795 @Deprecated +796 public static String hmacSha512Hex(final byte[] key, final InputStream valueToDigest) throws IOException { +797 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmacHex(valueToDigest); +798 } +799 +800 /** +801 * Returns a HmacSHA512 Message Authentication Code (MAC) as hex string (lowercase) for the given key and value. +802 * +803 * @param key +804 * They key for the keyed digest (must not be null) +805 * @param valueToDigest +806 * The value (data) which should to digest (maybe empty or null) +807 * @return HmacSHA512 MAC for the given key and value as hex string (lowercase) +808 * @throws IllegalArgumentException +809 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +810 * @deprecated (1.11) Use {@code new HmacUtils(HmacAlgorithms.HMAC_SHA_512, String).hmacHex(String)} +811 */ +812 @Deprecated +813 public static String hmacSha512Hex(final String key, final String valueToDigest) { +814 return new HmacUtils(HmacAlgorithms.HMAC_SHA_512, key).hmacHex(valueToDigest); +815 } +816 +817 // update +818 +819 /** +820 * Resets and then updates the given {@link Mac} with the value. +821 * +822 * @param mac +823 * the initialized {@link Mac} to update +824 * @param valueToDigest +825 * the value to update the {@link Mac} with (maybe null or empty) +826 * @return the updated {@link Mac} +827 * @throws IllegalStateException +828 * if the Mac was not initialized +829 */ +830 public static Mac updateHmac(final Mac mac, final byte[] valueToDigest) { +831 mac.reset(); +832 mac.update(valueToDigest); +833 return mac; +834 } +835 +836 /** +837 * Resets and then updates the given {@link Mac} with the value. +838 * +839 * @param mac +840 * the initialized {@link Mac} to update +841 * @param valueToDigest +842 * the value to update the {@link Mac} with +843 * <p> +844 * The InputStream must not be null and will not be closed +845 * </p> +846 * @return the updated {@link Mac} +847 * @throws IOException +848 * If an I/O error occurs. +849 * @throws IllegalStateException +850 * If the Mac was not initialized +851 */ +852 public static Mac updateHmac(final Mac mac, final InputStream valueToDigest) throws IOException { +853 mac.reset(); +854 final byte[] buffer = new byte[STREAM_BUFFER_LENGTH]; +855 int read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH); +856 +857 while (read > -1) { +858 mac.update(buffer, 0, read); +859 read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH); +860 } +861 +862 return mac; +863 } +864 +865 /** +866 * Resets and then updates the given {@link Mac} with the value. +867 * +868 * @param mac +869 * the initialized {@link Mac} to update +870 * @param valueToDigest +871 * the value to update the {@link Mac} with (maybe null or empty) +872 * @return the updated {@link Mac} +873 * @throws IllegalStateException +874 * if the Mac was not initialized +875 */ +876 public static Mac updateHmac(final Mac mac, final String valueToDigest) { +877 mac.reset(); +878 mac.update(StringUtils.getBytesUtf8(valueToDigest)); +879 return mac; +880 } +881 +882 /** +883 * Preserves binary compatibity only. +884 * As for previous versions does not provide useful behaviour +885 * @deprecated since 1.11; only useful to preserve binary compatibility +886 */ +887 @Deprecated +888 public HmacUtils() { +889 this(null); +890 } +891 +892 private final Mac mac; +893 +894 private HmacUtils(final Mac mac) { +895 this.mac = mac; +896 } +897 +898 /** +899 * Creates an instance using the provided algorithm type. +900 * +901 * @param algorithm to use +902 * @param key the key to use +903 * @throws IllegalArgumentException +904 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +905 * @since 1.11 +906 */ +907 public HmacUtils(final String algorithm, final byte[] key) { +908 this(getInitializedMac(algorithm, key)); +909 } +910 +911 /** +912 * Creates an instance using the provided algorithm type. +913 * +914 * @param algorithm to use +915 * @param key the key to use +916 * @throws IllegalArgumentException +917 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +918 * @since 1.11 +919 */ +920 public HmacUtils(final String algorithm, final String key) { +921 this(algorithm, StringUtils.getBytesUtf8(key)); +922 } +923 +924 /** +925 * Creates an instance using the provided algorithm type. +926 * +927 * @param algorithm to use +928 * @param key the key to use +929 * @throws IllegalArgumentException +930 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +931 * @since 1.11 +932 */ +933 public HmacUtils(final HmacAlgorithms algorithm, final String key) { +934 this(algorithm.getName(), StringUtils.getBytesUtf8(key)); +935 } +936 +937 /** +938 * Creates an instance using the provided algorithm type. +939 * +940 * @param algorithm to use. +941 * @param key the key to use +942 * @throws IllegalArgumentException +943 * when a {@link NoSuchAlgorithmException} is caught or key is null or key is invalid. +944 * @since 1.11 +945 */ +946 public HmacUtils(final HmacAlgorithms algorithm, final byte[] key) { +947 this(algorithm.getName(), key); +948 } +949 +950 /** +951 * Returns the digest for the input data. +952 * +953 * @param valueToDigest the input to use +954 * @return the digest as a byte[] +955 * @since 1.11 +956 */ +957 public byte[] hmac(final byte[] valueToDigest) { +958 return mac.doFinal(valueToDigest); +959 } +960 +961 /** +962 * Returns the digest for the input data. +963 * +964 * @param valueToDigest the input to use +965 * @return the digest as a hex String +966 * @since 1.11 +967 */ +968 public String hmacHex(final byte[] valueToDigest) { +969 return Hex.encodeHexString(hmac(valueToDigest)); +970 } +971 +972 /** +973 * Returns the digest for the input data. +974 * +975 * @param valueToDigest the input to use, treated as UTF-8 +976 * @return the digest as a byte[] +977 * @since 1.11 +978 */ +979 public byte[] hmac(final String valueToDigest) { +980 return mac.doFinal(StringUtils.getBytesUtf8(valueToDigest)); +981 } +982 +983 /** +984 * Returns the digest for the input data. +985 * +986 * @param valueToDigest the input to use, treated as UTF-8 +987 * @return the digest as a hex String +988 * @since 1.11 +989 */ +990 public String hmacHex(final String valueToDigest) { +991 return Hex.encodeHexString(hmac(valueToDigest)); +992 } +993 +994 /** +995 * Returns the digest for the input data. +996 * +997 * @param valueToDigest the input to use +998 * @return the digest as a byte[] +999 * @since 1.11 +1000 */ +1001 public byte[] hmac(final ByteBuffer valueToDigest) { +1002 mac.update(valueToDigest); +1003 return mac.doFinal(); +1004 } +1005 +1006 /** +1007 * Returns the digest for the input data. +1008 * +1009 * @param valueToDigest the input to use +1010 * @return the digest as a hex String +1011 * @since 1.11 +1012 */ +1013 public String hmacHex(final ByteBuffer valueToDigest) { +1014 return Hex.encodeHexString(hmac(valueToDigest)); +1015 } +1016 +1017 /** +1018 * Returns the digest for the stream. +1019 * +1020 * @param valueToDigest +1021 * the data to use +1022 * <p> +1023 * The InputStream must not be null and will not be closed +1024 * </p> +1025 * @return the digest +1026 * @throws IOException +1027 * If an I/O error occurs. +1028 * @since 1.11 +1029 */ +1030 public byte[] hmac(final InputStream valueToDigest) throws IOException { +1031 final byte[] buffer = new byte[STREAM_BUFFER_LENGTH]; +1032 int read; +1033 +1034 while ((read = valueToDigest.read(buffer, 0, STREAM_BUFFER_LENGTH) ) > -1) { +1035 mac.update(buffer, 0, read); +1036 } +1037 return mac.doFinal(); +1038 } +1039 +1040 /** +1041 * Returns the digest for the stream. +1042 * +1043 * @param valueToDigest +1044 * the data to use +1045 * <p> +1046 * The InputStream must not be null and will not be closed +1047 * </p> +1048 * @return the digest as a hex String +1049 * @throws IOException +1050 * If an I/O error occurs. +1051 * @since 1.11 +1052 */ +1053 public String hmacHex(final InputStream valueToDigest) throws IOException { +1054 return Hex.encodeHexString(hmac(valueToDigest)); +1055 } +1056 +1057 /** +1058 * Returns the digest for the file. +1059 * +1060 * @param valueToDigest the file to use +1061 * @return the digest +1062 * @throws IOException +1063 * If an I/O error occurs. +1064 * @since 1.11 +1065 */ +1066 public byte[] hmac(final File valueToDigest) throws IOException { +1067 try (final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(valueToDigest))) { +1068 return hmac(stream); +1069 } +1070 } +1071 +1072 /** +1073 * Returns the digest for the file. +1074 * +1075 * @param valueToDigest the file to use +1076 * @return the digest as a hex String +1077 * @throws IOException +1078 * If an I/O error occurs. +1079 * @since 1.11 +1080 */ +1081 public String hmacHex(final File valueToDigest) throws IOException { +1082 return Hex.encodeHexString(hmac(valueToDigest)); +1083 } +1084 +1085} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.digest; +018 +019import java.security.MessageDigest; +020import java.security.SecureRandom; +021import java.util.Arrays; +022import java.util.Random; +023import java.util.concurrent.ThreadLocalRandom; +024import java.util.regex.Matcher; +025import java.util.regex.Pattern; +026 +027import org.apache.commons.codec.Charsets; +028 +029/** +030 * The libc crypt() "$1$" and Apache "$apr1$" MD5-based hash algorithm. +031 * <p> +032 * Based on the public domain ("beer-ware") C implementation from Poul-Henning Kamp which was found at: <a +033 * href="http://www.freebsd.org/cgi/cvsweb.cgi/src/lib/libcrypt/crypt-md5.c?rev=1.1;content-type=text%2Fplain"> +034 * crypt-md5.c @ freebsd.org</a><br> +035 * <p> +036 * Source: +037 * +038 * <pre> +039 * $FreeBSD: src/lib/libcrypt/crypt-md5.c,v 1.1 1999/01/21 13:50:09 brandon Exp $ +040 * </pre> +041 * <p> +042 * Conversion to Kotlin and from there to Java in 2012. +043 * <p> +044 * The C style comments are from the original C code, the ones with "//" from the port. +045 * <p> +046 * This class is immutable and thread-safe. +047 * +048 * @version $Id$ +049 * @since 1.7 +050 */ +051public class Md5Crypt { +052 +053 /** The Identifier of the Apache variant. */ +054 static final String APR1_PREFIX = "$apr1$"; +055 +056 /** The number of bytes of the final hash. */ +057 private static final int BLOCKSIZE = 16; +058 +059 /** The Identifier of this crypt() variant. */ +060 static final String MD5_PREFIX = "$1$"; +061 +062 /** The number of rounds of the big loop. */ +063 private static final int ROUNDS = 1000; +064 +065 /** +066 * See {@link #apr1Crypt(byte[], String)} for details. +067 * <p> +068 * A salt is generated for you using {@link SecureRandom}; your own {@link Random} in +069 * {@link #apr1Crypt(byte[], Random)}. +070 * </p> +071 * +072 * @param keyBytes plaintext string to hash. +073 * @return the hash value +074 * @throws IllegalArgumentException when a {@link java.security.NoSuchAlgorithmException} is caught. * +075 * @see #apr1Crypt(byte[], String) +076 */ +077 public static String apr1Crypt(final byte[] keyBytes) { +078 return apr1Crypt(keyBytes, APR1_PREFIX + B64.getRandomSalt(8)); +079 } +080 +081 /** +082 * See {@link #apr1Crypt(byte[], String)} for details. +083 * <p> +084 * A salt is generated for you using the user provided {@link Random}. +085 * </p> +086 * +087 * @param keyBytes plaintext string to hash. +088 * @param random an arbitrary {@link Random} for the user's reason. +089 * @param random the instance of {@link Random} to use for generating the salt. Consider using {@link SecureRandom} +090 * or {@link ThreadLocalRandom}. +091 * @throws IllegalArgumentException when a {@link java.security.NoSuchAlgorithmException} is caught. * +092 * @see #apr1Crypt(byte[], String) +093 */ +094 public static String apr1Crypt(final byte[] keyBytes, final Random random) { +095 return apr1Crypt(keyBytes, APR1_PREFIX + B64.getRandomSalt(8, random)); +096 } +097 +098 /** +099 * See {@link #apr1Crypt(String, String)} for details. +100 * <p> +101 * A salt is generated for you using {@link SecureRandom} +102 * </p> +103 * +104 * @param keyBytes +105 * plaintext string to hash. +106 * @param salt +107 * An APR1 salt. The salt may be null, in which case a salt is generated for you using +108 * {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to generate your +109 * own salts. +110 * @return the hash value +111 * @throws IllegalArgumentException +112 * if the salt does not match the allowed pattern +113 * @throws IllegalArgumentException +114 * when a {@link java.security.NoSuchAlgorithmException} is caught. +115 */ +116 public static String apr1Crypt(final byte[] keyBytes, String salt) { +117 // to make the md5Crypt regex happy +118 if (salt != null && !salt.startsWith(APR1_PREFIX)) { +119 salt = APR1_PREFIX + salt; +120 } +121 return Md5Crypt.md5Crypt(keyBytes, salt, APR1_PREFIX); +122 } +123 +124 /** +125 * See {@link #apr1Crypt(String, String)} for details. +126 * <p> +127 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +128 * {@link SecureRandom} to generate your own salts and calling {@link #apr1Crypt(byte[], String)}. +129 * </p> +130 * +131 * @param keyBytes +132 * plaintext string to hash. +133 * @return the hash value +134 * @throws IllegalArgumentException +135 * when a {@link java.security.NoSuchAlgorithmException} is caught. +136 * @see #apr1Crypt(byte[], String) +137 */ +138 public static String apr1Crypt(final String keyBytes) { +139 return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8)); +140 } +141 +142 /** +143 * Generates an Apache htpasswd compatible "$apr1$" MD5 based hash value. +144 * <p> +145 * The algorithm is identical to the crypt(3) "$1$" one but produces different outputs due to the different salt +146 * prefix. +147 * +148 * @param keyBytes +149 * plaintext string to hash. +150 * @param salt +151 * salt string including the prefix and optionally garbage at the end. The salt may be null, in which +152 * case a salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +153 * {@link SecureRandom} to generate your own salts. +154 * @return the hash value +155 * @throws IllegalArgumentException +156 * if the salt does not match the allowed pattern +157 * @throws IllegalArgumentException +158 * when a {@link java.security.NoSuchAlgorithmException} is caught. +159 */ +160 public static String apr1Crypt(final String keyBytes, final String salt) { +161 return apr1Crypt(keyBytes.getBytes(Charsets.UTF_8), salt); +162 } +163 +164 /** +165 * Generates a libc6 crypt() compatible "$1$" hash value. +166 * <p> +167 * See {@link #md5Crypt(byte[], String)} for details. +168 *</p> +169 * <p> +170 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +171 * {@link SecureRandom} to generate your own salts and calling {@link #md5Crypt(byte[], String)}. +172 * </p> +173 * @param keyBytes +174 * plaintext string to hash. +175 * @return the hash value +176 * @throws IllegalArgumentException +177 * when a {@link java.security.NoSuchAlgorithmException} is caught. +178 * @see #md5Crypt(byte[], String) +179 */ +180 public static String md5Crypt(final byte[] keyBytes) { +181 return md5Crypt(keyBytes, MD5_PREFIX + B64.getRandomSalt(8)); +182 } +183 +184 /** +185 * Generates a libc6 crypt() compatible "$1$" hash value. +186 * <p> +187 * See {@link #md5Crypt(byte[], String)} for details. +188 *</p> +189 * <p> +190 * A salt is generated for you using the instance of {@link Random} you supply. +191 * </p> +192 * @param keyBytes +193 * plaintext string to hash. +194 * @param random +195 * the instance of {@link Random} to use for generating the salt. Consider using {@link SecureRandom} +196 * or {@link ThreadLocalRandom}. +197 * @return the hash value +198 * @throws IllegalArgumentException +199 * when a {@link java.security.NoSuchAlgorithmException} is caught. +200 * @see #md5Crypt(byte[], String) +201 */ +202 public static String md5Crypt(final byte[] keyBytes, final Random random) { +203 return md5Crypt(keyBytes, MD5_PREFIX + B64.getRandomSalt(8, random)); +204 } +205 +206 /** +207 * Generates a libc crypt() compatible "$1$" MD5 based hash value. +208 * <p> +209 * See {@link Crypt#crypt(String, String)} for details. We use {@link SecureRandom} for seed generation by +210 * default. +211 * </p> +212 * +213 * @param keyBytes +214 * plaintext string to hash. +215 * @param salt +216 * salt string including the prefix and optionally garbage at the end. The salt may be null, in which +217 * case a salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +218 * {@link SecureRandom} to generate your own salts. +219 * @return the hash value +220 * @throws IllegalArgumentException +221 * if the salt does not match the allowed pattern +222 * @throws IllegalArgumentException +223 * when a {@link java.security.NoSuchAlgorithmException} is caught. +224 */ +225 public static String md5Crypt(final byte[] keyBytes, final String salt) { +226 return md5Crypt(keyBytes, salt, MD5_PREFIX); +227 } +228 +229 /** +230 * Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value. +231 * <p> +232 * See {@link Crypt#crypt(String, String)} or {@link #apr1Crypt(String, String)} for details. We use +233 * {@link SecureRandom by default}. +234 * </p> +235 * +236 * @param keyBytes +237 * plaintext string to hash. +238 * @param salt +239 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +240 * you using {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to +241 * generate your own salts. +242 * @param prefix +243 * salt prefix +244 * @return the hash value +245 * @throws IllegalArgumentException +246 * if the salt does not match the allowed pattern +247 * @throws IllegalArgumentException +248 * when a {@link java.security.NoSuchAlgorithmException} is caught. +249 */ +250 public static String md5Crypt(final byte[] keyBytes, final String salt, final String prefix) { +251 return md5Crypt(keyBytes, salt, prefix, new SecureRandom()); +252 } +253 +254 /** +255 * Generates a libc6 crypt() "$1$" or Apache htpasswd "$apr1$" hash value. +256 * <p> +257 * See {@link Crypt#crypt(String, String)} or {@link #apr1Crypt(String, String)} for details. +258 * </p> +259 * +260 * @param keyBytes +261 * plaintext string to hash. +262 * @param salt +263 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +264 * you using {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to +265 * generate your own salts. +266 * @param prefix +267 * salt prefix +268 * @param random +269 * the instance of {@link Random} to use for generating the salt. Consider using {@link SecureRandom} +270 * or {@link ThreadLocalRandom}. +271 * @return the hash value +272 * @throws IllegalArgumentException +273 * if the salt does not match the allowed pattern +274 * @throws IllegalArgumentException +275 * when a {@link java.security.NoSuchAlgorithmException} is caught. +276 */ +277 public static String md5Crypt(final byte[] keyBytes, final String salt, final String prefix, final Random random) { +278 final int keyLen = keyBytes.length; +279 +280 // Extract the real salt from the given string which can be a complete hash string. +281 String saltString; +282 if (salt == null) { +283 saltString = B64.getRandomSalt(8, random); +284 } else { +285 final Pattern p = Pattern.compile("^" + prefix.replace("$", "\\$") + "([\\.\\/a-zA-Z0-9]{1,8}).*"); +286 final Matcher m = p.matcher(salt); +287 if (!m.find()) { +288 throw new IllegalArgumentException("Invalid salt value: " + salt); +289 } +290 saltString = m.group(1); +291 } +292 final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8); +293 +294 final MessageDigest ctx = DigestUtils.getMd5Digest(); +295 +296 /* +297 * The password first, since that is what is most unknown +298 */ +299 ctx.update(keyBytes); +300 +301 /* +302 * Then our magic string +303 */ +304 ctx.update(prefix.getBytes(Charsets.UTF_8)); +305 +306 /* +307 * Then the raw salt +308 */ +309 ctx.update(saltBytes); +310 +311 /* +312 * Then just as many characters of the MD5(pw,salt,pw) +313 */ +314 MessageDigest ctx1 = DigestUtils.getMd5Digest(); +315 ctx1.update(keyBytes); +316 ctx1.update(saltBytes); +317 ctx1.update(keyBytes); +318 byte[] finalb = ctx1.digest(); +319 int ii = keyLen; +320 while (ii > 0) { +321 ctx.update(finalb, 0, ii > 16 ? 16 : ii); +322 ii -= 16; +323 } +324 +325 /* +326 * Don't leave anything around in vm they could use. +327 */ +328 Arrays.fill(finalb, (byte) 0); +329 +330 /* +331 * Then something really weird... +332 */ +333 ii = keyLen; +334 final int j = 0; +335 while (ii > 0) { +336 if ((ii & 1) == 1) { +337 ctx.update(finalb[j]); +338 } else { +339 ctx.update(keyBytes[j]); +340 } +341 ii >>= 1; +342 } +343 +344 /* +345 * Now make the output string +346 */ +347 final StringBuilder passwd = new StringBuilder(prefix + saltString + "$"); +348 finalb = ctx.digest(); +349 +350 /* +351 * and now, just to make sure things don't run too fast On a 60 Mhz Pentium this takes 34 msec, so you would +352 * need 30 seconds to build a 1000 entry dictionary... +353 */ +354 for (int i = 0; i < ROUNDS; i++) { +355 ctx1 = DigestUtils.getMd5Digest(); +356 if ((i & 1) != 0) { +357 ctx1.update(keyBytes); +358 } else { +359 ctx1.update(finalb, 0, BLOCKSIZE); +360 } +361 +362 if (i % 3 != 0) { +363 ctx1.update(saltBytes); +364 } +365 +366 if (i % 7 != 0) { +367 ctx1.update(keyBytes); +368 } +369 +370 if ((i & 1) != 0) { +371 ctx1.update(finalb, 0, BLOCKSIZE); +372 } else { +373 ctx1.update(keyBytes); +374 } +375 finalb = ctx1.digest(); +376 } +377 +378 // The following was nearly identical to the Sha2Crypt code. +379 // Again, the buflen is not really needed. +380 // int buflen = MD5_PREFIX.length() - 1 + salt_string.length() + 1 + BLOCKSIZE + 1; +381 B64.b64from24bit(finalb[0], finalb[6], finalb[12], 4, passwd); +382 B64.b64from24bit(finalb[1], finalb[7], finalb[13], 4, passwd); +383 B64.b64from24bit(finalb[2], finalb[8], finalb[14], 4, passwd); +384 B64.b64from24bit(finalb[3], finalb[9], finalb[15], 4, passwd); +385 B64.b64from24bit(finalb[4], finalb[10], finalb[5], 4, passwd); +386 B64.b64from24bit((byte) 0, (byte) 0, finalb[11], 2, passwd); +387 +388 /* +389 * Don't leave anything around in vm they could use. +390 */ +391 // Is there a better way to do this with the JVM? +392 ctx.reset(); +393 ctx1.reset(); +394 Arrays.fill(keyBytes, (byte) 0); +395 Arrays.fill(saltBytes, (byte) 0); +396 Arrays.fill(finalb, (byte) 0); +397 +398 return passwd.toString(); +399 } +400} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.digest; +019 +020import java.security.MessageDigest; +021 +022/** +023 * Standard {@link MessageDigest} algorithm names from the <cite>Java Cryptography Architecture Standard Algorithm Name +024 * Documentation</cite>. +025 * <p> +026 * This class is immutable and thread-safe. +027 * </p> +028 * <p> +029 * Java 8 and up: SHA-224. +030 * </p> +031 * <p> +032 * Java 9 and up: SHA3-224, SHA3-256, SHA3-384, SHA3-512. +033 * </p> +034 * +035 * @see <a href="http://docs.oracle.com/javase/6/docs/technotes/guides/security/StandardNames.html#MessageDigest"> +036 * Java 6 Cryptography Architecture Standard Algorithm Name Documentation</a> +037 * @see <a href="http://docs.oracle.com/javase/7/docs/technotes/guides/security/StandardNames.html#MessageDigest"> +038 * Java 7 Cryptography Architecture Standard Algorithm Name Documentation</a> +039 * @see <a href="http://docs.oracle.com/javase/8/docs/technotes/guides/security/StandardNames.html#MessageDigest"> +040 * Java 8 Cryptography Architecture Standard Algorithm Name Documentation</a> +041 * @see <a href="http://download.java.net/java/jdk9/docs/technotes/guides/security/StandardNames.html#MessageDigest"> +042 * Java 9 Cryptography Architecture Standard Algorithm Name Documentation</a> +043 * +044 * @see <a href="http://dx.doi.org/10.6028/NIST.FIPS.180-4">FIPS PUB 180-4</a> +045 * @see <a href="http://dx.doi.org/10.6028/NIST.FIPS.202">FIPS PUB 202</a> +046 * @since 1.7 +047 * @version $Id$ +048 */ +049public class MessageDigestAlgorithms { +050 +051 /** +052 * The MD2 message digest algorithm defined in RFC 1319. +053 */ +054 public static final String MD2 = "MD2"; +055 +056 /** +057 * The MD5 message digest algorithm defined in RFC 1321. +058 */ +059 public static final String MD5 = "MD5"; +060 +061 /** +062 * The SHA-1 hash algorithm defined in the FIPS PUB 180-2. +063 */ +064 public static final String SHA_1 = "SHA-1"; +065 +066 /** +067 * The SHA-224 hash algorithm defined in the FIPS PUB 180-3. +068 * <p> +069 * Present in Oracle Java 8. +070 * </p> +071 * +072 * @since 1.11 +073 */ +074 public static final String SHA_224 = "SHA-224"; +075 +076 /** +077 * The SHA-256 hash algorithm defined in the FIPS PUB 180-2. +078 */ +079 public static final String SHA_256 = "SHA-256"; +080 +081 /** +082 * The SHA-384 hash algorithm defined in the FIPS PUB 180-2. +083 */ +084 public static final String SHA_384 = "SHA-384"; +085 +086 /** +087 * The SHA-512 hash algorithm defined in the FIPS PUB 180-2. +088 */ +089 public static final String SHA_512 = "SHA-512"; +090 +091 /** +092 * The SHA3-224 hash algorithm defined in the FIPS PUB 202. +093 * <p> +094 * Included starting in Oracle Java 9 GA. +095 * </p> +096 * +097 * @since 1.11 +098 */ +099 public static final String SHA3_224 = "SHA3-224"; +100 +101 /** +102 * The SHA3-256 hash algorithm defined in the FIPS PUB 202. +103 * <p> +104 * Included starting in Oracle Java 9 GA. +105 * </p> +106 * +107 * @since 1.11 +108 */ +109 public static final String SHA3_256 = "SHA3-256"; +110 +111 /** +112 * The SHA3-384 hash algorithm defined in the FIPS PUB 202. +113 * <p> +114 * Included starting in Oracle Java 9 GA. +115 * </p> +116 * +117 * @since 1.11 +118 */ +119 public static final String SHA3_384 = "SHA3-384"; +120 +121 /** +122 * The SHA3-512 hash algorithm defined in the FIPS PUB 202. +123 * <p> +124 * Included starting in Oracle Java 9 GA. +125 * </p> +126 * +127 * @since 1.11 +128 */ +129 public static final String SHA3_512 = "SHA3-512"; +130 +131 /** +132 * Gets all constant values defined in this class. +133 * +134 * @return all constant values defined in this class. +135 * @since 1.11 +136 */ +137 public static String[] values() { +138 // N.B. do not use a constant array here as that can be changed externally by accident or design +139 return new String[] { +140 MD2, MD5, SHA_1, SHA_224, SHA_256, SHA_384, SHA_512, SHA3_224, SHA3_256, SHA3_384, SHA3_512 +141 }; +142 } +143 +144 private MessageDigestAlgorithms() { +145 // cannot be instantiated. +146 } +147 +148} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.digest; +018 +019import java.util.zip.Checksum; +020 +021/** +022 * A pure-java implementation of the CRC32 checksum that uses +023 * the same polynomial as the built-in native CRC32. +024 * +025 * This is to avoid the JNI overhead for certain uses of Checksumming +026 * where many small pieces of data are checksummed in succession. +027 * +028 * The current version is ~10x to 1.8x as fast as Sun's native +029 * java.util.zip.CRC32 in Java 1.6 +030 * +031 * Copied from Hadoop 2.6.3. +032 * The code agrees with the following file in the 2.6.3 tag: +033 * https://git-wip-us.apache.org/repos/asf?p=hadoop.git;a=blob_plain; +034 * f=hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32.java; +035 * hb=2120de588b92b9f22b1cc4188761d6a8c61aa778 +036 * <p> +037 * This class is Not ThreadSafe +038 * +039 * @see java.util.zip.CRC32 +040 * @since 1.11 +041 */ +042public class PureJavaCrc32 implements Checksum { +043 +044 /** the current CRC value, bit-flipped */ +045 private int crc; +046 +047 /** Create a new PureJavaCrc32 object. */ +048 public PureJavaCrc32() { +049 _reset(); +050 } +051 +052 @Override +053 public long getValue() { +054 return (~crc) & 0xffffffffL; +055 } +056 +057 @Override +058 public void reset() { +059 _reset(); +060 } +061 +062 // called by ctor, so must not be overrideable +063 private void _reset() { +064 crc = 0xffffffff; +065 } +066 +067 @Override +068 public void update(final byte[] b, final int offset, final int len) { +069 int localCrc = crc; +070 +071 final int remainder = len & 0x7; +072 int i = offset; +073 for(final int end = offset + len - remainder; i < end; i += 8) { +074 final int x = localCrc ^ +075 ((((b[i ] << 24) >>> 24) + ((b[i+1] << 24) >>> 16)) + +076 (((b[i+2] << 24) >>> 8 ) + (b[i+3] << 24))); +077 +078 localCrc = ((T[((x << 24) >>> 24) + 0x700] ^ T[((x << 16) >>> 24) + 0x600]) ^ +079 (T[((x << 8) >>> 24) + 0x500] ^ T[ (x >>> 24) + 0x400])) ^ +080 ((T[((b[i+4] << 24) >>> 24) + 0x300] ^ T[((b[i+5] << 24) >>> 24) + 0x200]) ^ +081 (T[((b[i+6] << 24) >>> 24) + 0x100] ^ T[((b[i+7] << 24) >>> 24)])); +082 } +083 +084 /* loop unroll - duff's device style */ +085 switch(remainder) { +086 case 7: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +087 case 6: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +088 case 5: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +089 case 4: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +090 case 3: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +091 case 2: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +092 case 1: localCrc = (localCrc >>> 8) ^ T[((localCrc ^ b[i++]) << 24) >>> 24]; +093 default: +094 /* nothing */ +095 } +096 +097 // Publish crc out to object +098 crc = localCrc; +099 } +100 +101 @Override +102 final public void update(final int b) { +103 crc = (crc >>> 8) ^ T[(((crc ^ b) << 24) >>> 24)]; +104 } +105 +106 /* +107 * CRC-32 lookup tables generated by the polynomial 0xEDB88320. +108 * See also TestPureJavaCrc32.Table. +109 */ +110 private static final int[] T = new int[] { +111 /* T8_0 */ +112 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, +113 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, +114 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, +115 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, +116 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, +117 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, +118 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, +119 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, +120 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, +121 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, +122 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, +123 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, +124 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, +125 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, +126 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, +127 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, +128 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, +129 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, +130 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, +131 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, +132 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, +133 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, +134 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, +135 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, +136 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, +137 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, +138 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, +139 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, +140 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, +141 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, +142 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, +143 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, +144 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, +145 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, +146 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, +147 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, +148 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, +149 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, +150 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, +151 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, +152 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, +153 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, +154 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, +155 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, +156 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, +157 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, +158 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, +159 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, +160 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, +161 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, +162 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, +163 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, +164 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, +165 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, +166 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, +167 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, +168 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, +169 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, +170 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, +171 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, +172 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, +173 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, +174 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, +175 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D, +176 /* T8_1 */ +177 0x00000000, 0x191B3141, 0x32366282, 0x2B2D53C3, +178 0x646CC504, 0x7D77F445, 0x565AA786, 0x4F4196C7, +179 0xC8D98A08, 0xD1C2BB49, 0xFAEFE88A, 0xE3F4D9CB, +180 0xACB54F0C, 0xB5AE7E4D, 0x9E832D8E, 0x87981CCF, +181 0x4AC21251, 0x53D92310, 0x78F470D3, 0x61EF4192, +182 0x2EAED755, 0x37B5E614, 0x1C98B5D7, 0x05838496, +183 0x821B9859, 0x9B00A918, 0xB02DFADB, 0xA936CB9A, +184 0xE6775D5D, 0xFF6C6C1C, 0xD4413FDF, 0xCD5A0E9E, +185 0x958424A2, 0x8C9F15E3, 0xA7B24620, 0xBEA97761, +186 0xF1E8E1A6, 0xE8F3D0E7, 0xC3DE8324, 0xDAC5B265, +187 0x5D5DAEAA, 0x44469FEB, 0x6F6BCC28, 0x7670FD69, +188 0x39316BAE, 0x202A5AEF, 0x0B07092C, 0x121C386D, +189 0xDF4636F3, 0xC65D07B2, 0xED705471, 0xF46B6530, +190 0xBB2AF3F7, 0xA231C2B6, 0x891C9175, 0x9007A034, +191 0x179FBCFB, 0x0E848DBA, 0x25A9DE79, 0x3CB2EF38, +192 0x73F379FF, 0x6AE848BE, 0x41C51B7D, 0x58DE2A3C, +193 0xF0794F05, 0xE9627E44, 0xC24F2D87, 0xDB541CC6, +194 0x94158A01, 0x8D0EBB40, 0xA623E883, 0xBF38D9C2, +195 0x38A0C50D, 0x21BBF44C, 0x0A96A78F, 0x138D96CE, +196 0x5CCC0009, 0x45D73148, 0x6EFA628B, 0x77E153CA, +197 0xBABB5D54, 0xA3A06C15, 0x888D3FD6, 0x91960E97, +198 0xDED79850, 0xC7CCA911, 0xECE1FAD2, 0xF5FACB93, +199 0x7262D75C, 0x6B79E61D, 0x4054B5DE, 0x594F849F, +200 0x160E1258, 0x0F152319, 0x243870DA, 0x3D23419B, +201 0x65FD6BA7, 0x7CE65AE6, 0x57CB0925, 0x4ED03864, +202 0x0191AEA3, 0x188A9FE2, 0x33A7CC21, 0x2ABCFD60, +203 0xAD24E1AF, 0xB43FD0EE, 0x9F12832D, 0x8609B26C, +204 0xC94824AB, 0xD05315EA, 0xFB7E4629, 0xE2657768, +205 0x2F3F79F6, 0x362448B7, 0x1D091B74, 0x04122A35, +206 0x4B53BCF2, 0x52488DB3, 0x7965DE70, 0x607EEF31, +207 0xE7E6F3FE, 0xFEFDC2BF, 0xD5D0917C, 0xCCCBA03D, +208 0x838A36FA, 0x9A9107BB, 0xB1BC5478, 0xA8A76539, +209 0x3B83984B, 0x2298A90A, 0x09B5FAC9, 0x10AECB88, +210 0x5FEF5D4F, 0x46F46C0E, 0x6DD93FCD, 0x74C20E8C, +211 0xF35A1243, 0xEA412302, 0xC16C70C1, 0xD8774180, +212 0x9736D747, 0x8E2DE606, 0xA500B5C5, 0xBC1B8484, +213 0x71418A1A, 0x685ABB5B, 0x4377E898, 0x5A6CD9D9, +214 0x152D4F1E, 0x0C367E5F, 0x271B2D9C, 0x3E001CDD, +215 0xB9980012, 0xA0833153, 0x8BAE6290, 0x92B553D1, +216 0xDDF4C516, 0xC4EFF457, 0xEFC2A794, 0xF6D996D5, +217 0xAE07BCE9, 0xB71C8DA8, 0x9C31DE6B, 0x852AEF2A, +218 0xCA6B79ED, 0xD37048AC, 0xF85D1B6F, 0xE1462A2E, +219 0x66DE36E1, 0x7FC507A0, 0x54E85463, 0x4DF36522, +220 0x02B2F3E5, 0x1BA9C2A4, 0x30849167, 0x299FA026, +221 0xE4C5AEB8, 0xFDDE9FF9, 0xD6F3CC3A, 0xCFE8FD7B, +222 0x80A96BBC, 0x99B25AFD, 0xB29F093E, 0xAB84387F, +223 0x2C1C24B0, 0x350715F1, 0x1E2A4632, 0x07317773, +224 0x4870E1B4, 0x516BD0F5, 0x7A468336, 0x635DB277, +225 0xCBFAD74E, 0xD2E1E60F, 0xF9CCB5CC, 0xE0D7848D, +226 0xAF96124A, 0xB68D230B, 0x9DA070C8, 0x84BB4189, +227 0x03235D46, 0x1A386C07, 0x31153FC4, 0x280E0E85, +228 0x674F9842, 0x7E54A903, 0x5579FAC0, 0x4C62CB81, +229 0x8138C51F, 0x9823F45E, 0xB30EA79D, 0xAA1596DC, +230 0xE554001B, 0xFC4F315A, 0xD7626299, 0xCE7953D8, +231 0x49E14F17, 0x50FA7E56, 0x7BD72D95, 0x62CC1CD4, +232 0x2D8D8A13, 0x3496BB52, 0x1FBBE891, 0x06A0D9D0, +233 0x5E7EF3EC, 0x4765C2AD, 0x6C48916E, 0x7553A02F, +234 0x3A1236E8, 0x230907A9, 0x0824546A, 0x113F652B, +235 0x96A779E4, 0x8FBC48A5, 0xA4911B66, 0xBD8A2A27, +236 0xF2CBBCE0, 0xEBD08DA1, 0xC0FDDE62, 0xD9E6EF23, +237 0x14BCE1BD, 0x0DA7D0FC, 0x268A833F, 0x3F91B27E, +238 0x70D024B9, 0x69CB15F8, 0x42E6463B, 0x5BFD777A, +239 0xDC656BB5, 0xC57E5AF4, 0xEE530937, 0xF7483876, +240 0xB809AEB1, 0xA1129FF0, 0x8A3FCC33, 0x9324FD72, +241 /* T8_2 */ +242 0x00000000, 0x01C26A37, 0x0384D46E, 0x0246BE59, +243 0x0709A8DC, 0x06CBC2EB, 0x048D7CB2, 0x054F1685, +244 0x0E1351B8, 0x0FD13B8F, 0x0D9785D6, 0x0C55EFE1, +245 0x091AF964, 0x08D89353, 0x0A9E2D0A, 0x0B5C473D, +246 0x1C26A370, 0x1DE4C947, 0x1FA2771E, 0x1E601D29, +247 0x1B2F0BAC, 0x1AED619B, 0x18ABDFC2, 0x1969B5F5, +248 0x1235F2C8, 0x13F798FF, 0x11B126A6, 0x10734C91, +249 0x153C5A14, 0x14FE3023, 0x16B88E7A, 0x177AE44D, +250 0x384D46E0, 0x398F2CD7, 0x3BC9928E, 0x3A0BF8B9, +251 0x3F44EE3C, 0x3E86840B, 0x3CC03A52, 0x3D025065, +252 0x365E1758, 0x379C7D6F, 0x35DAC336, 0x3418A901, +253 0x3157BF84, 0x3095D5B3, 0x32D36BEA, 0x331101DD, +254 0x246BE590, 0x25A98FA7, 0x27EF31FE, 0x262D5BC9, +255 0x23624D4C, 0x22A0277B, 0x20E69922, 0x2124F315, +256 0x2A78B428, 0x2BBADE1F, 0x29FC6046, 0x283E0A71, +257 0x2D711CF4, 0x2CB376C3, 0x2EF5C89A, 0x2F37A2AD, +258 0x709A8DC0, 0x7158E7F7, 0x731E59AE, 0x72DC3399, +259 0x7793251C, 0x76514F2B, 0x7417F172, 0x75D59B45, +260 0x7E89DC78, 0x7F4BB64F, 0x7D0D0816, 0x7CCF6221, +261 0x798074A4, 0x78421E93, 0x7A04A0CA, 0x7BC6CAFD, +262 0x6CBC2EB0, 0x6D7E4487, 0x6F38FADE, 0x6EFA90E9, +263 0x6BB5866C, 0x6A77EC5B, 0x68315202, 0x69F33835, +264 0x62AF7F08, 0x636D153F, 0x612BAB66, 0x60E9C151, +265 0x65A6D7D4, 0x6464BDE3, 0x662203BA, 0x67E0698D, +266 0x48D7CB20, 0x4915A117, 0x4B531F4E, 0x4A917579, +267 0x4FDE63FC, 0x4E1C09CB, 0x4C5AB792, 0x4D98DDA5, +268 0x46C49A98, 0x4706F0AF, 0x45404EF6, 0x448224C1, +269 0x41CD3244, 0x400F5873, 0x4249E62A, 0x438B8C1D, +270 0x54F16850, 0x55330267, 0x5775BC3E, 0x56B7D609, +271 0x53F8C08C, 0x523AAABB, 0x507C14E2, 0x51BE7ED5, +272 0x5AE239E8, 0x5B2053DF, 0x5966ED86, 0x58A487B1, +273 0x5DEB9134, 0x5C29FB03, 0x5E6F455A, 0x5FAD2F6D, +274 0xE1351B80, 0xE0F771B7, 0xE2B1CFEE, 0xE373A5D9, +275 0xE63CB35C, 0xE7FED96B, 0xE5B86732, 0xE47A0D05, +276 0xEF264A38, 0xEEE4200F, 0xECA29E56, 0xED60F461, +277 0xE82FE2E4, 0xE9ED88D3, 0xEBAB368A, 0xEA695CBD, +278 0xFD13B8F0, 0xFCD1D2C7, 0xFE976C9E, 0xFF5506A9, +279 0xFA1A102C, 0xFBD87A1B, 0xF99EC442, 0xF85CAE75, +280 0xF300E948, 0xF2C2837F, 0xF0843D26, 0xF1465711, +281 0xF4094194, 0xF5CB2BA3, 0xF78D95FA, 0xF64FFFCD, +282 0xD9785D60, 0xD8BA3757, 0xDAFC890E, 0xDB3EE339, +283 0xDE71F5BC, 0xDFB39F8B, 0xDDF521D2, 0xDC374BE5, +284 0xD76B0CD8, 0xD6A966EF, 0xD4EFD8B6, 0xD52DB281, +285 0xD062A404, 0xD1A0CE33, 0xD3E6706A, 0xD2241A5D, +286 0xC55EFE10, 0xC49C9427, 0xC6DA2A7E, 0xC7184049, +287 0xC25756CC, 0xC3953CFB, 0xC1D382A2, 0xC011E895, +288 0xCB4DAFA8, 0xCA8FC59F, 0xC8C97BC6, 0xC90B11F1, +289 0xCC440774, 0xCD866D43, 0xCFC0D31A, 0xCE02B92D, +290 0x91AF9640, 0x906DFC77, 0x922B422E, 0x93E92819, +291 0x96A63E9C, 0x976454AB, 0x9522EAF2, 0x94E080C5, +292 0x9FBCC7F8, 0x9E7EADCF, 0x9C381396, 0x9DFA79A1, +293 0x98B56F24, 0x99770513, 0x9B31BB4A, 0x9AF3D17D, +294 0x8D893530, 0x8C4B5F07, 0x8E0DE15E, 0x8FCF8B69, +295 0x8A809DEC, 0x8B42F7DB, 0x89044982, 0x88C623B5, +296 0x839A6488, 0x82580EBF, 0x801EB0E6, 0x81DCDAD1, +297 0x8493CC54, 0x8551A663, 0x8717183A, 0x86D5720D, +298 0xA9E2D0A0, 0xA820BA97, 0xAA6604CE, 0xABA46EF9, +299 0xAEEB787C, 0xAF29124B, 0xAD6FAC12, 0xACADC625, +300 0xA7F18118, 0xA633EB2F, 0xA4755576, 0xA5B73F41, +301 0xA0F829C4, 0xA13A43F3, 0xA37CFDAA, 0xA2BE979D, +302 0xB5C473D0, 0xB40619E7, 0xB640A7BE, 0xB782CD89, +303 0xB2CDDB0C, 0xB30FB13B, 0xB1490F62, 0xB08B6555, +304 0xBBD72268, 0xBA15485F, 0xB853F606, 0xB9919C31, +305 0xBCDE8AB4, 0xBD1CE083, 0xBF5A5EDA, 0xBE9834ED, +306 /* T8_3 */ +307 0x00000000, 0xB8BC6765, 0xAA09C88B, 0x12B5AFEE, +308 0x8F629757, 0x37DEF032, 0x256B5FDC, 0x9DD738B9, +309 0xC5B428EF, 0x7D084F8A, 0x6FBDE064, 0xD7018701, +310 0x4AD6BFB8, 0xF26AD8DD, 0xE0DF7733, 0x58631056, +311 0x5019579F, 0xE8A530FA, 0xFA109F14, 0x42ACF871, +312 0xDF7BC0C8, 0x67C7A7AD, 0x75720843, 0xCDCE6F26, +313 0x95AD7F70, 0x2D111815, 0x3FA4B7FB, 0x8718D09E, +314 0x1ACFE827, 0xA2738F42, 0xB0C620AC, 0x087A47C9, +315 0xA032AF3E, 0x188EC85B, 0x0A3B67B5, 0xB28700D0, +316 0x2F503869, 0x97EC5F0C, 0x8559F0E2, 0x3DE59787, +317 0x658687D1, 0xDD3AE0B4, 0xCF8F4F5A, 0x7733283F, +318 0xEAE41086, 0x525877E3, 0x40EDD80D, 0xF851BF68, +319 0xF02BF8A1, 0x48979FC4, 0x5A22302A, 0xE29E574F, +320 0x7F496FF6, 0xC7F50893, 0xD540A77D, 0x6DFCC018, +321 0x359FD04E, 0x8D23B72B, 0x9F9618C5, 0x272A7FA0, +322 0xBAFD4719, 0x0241207C, 0x10F48F92, 0xA848E8F7, +323 0x9B14583D, 0x23A83F58, 0x311D90B6, 0x89A1F7D3, +324 0x1476CF6A, 0xACCAA80F, 0xBE7F07E1, 0x06C36084, +325 0x5EA070D2, 0xE61C17B7, 0xF4A9B859, 0x4C15DF3C, +326 0xD1C2E785, 0x697E80E0, 0x7BCB2F0E, 0xC377486B, +327 0xCB0D0FA2, 0x73B168C7, 0x6104C729, 0xD9B8A04C, +328 0x446F98F5, 0xFCD3FF90, 0xEE66507E, 0x56DA371B, +329 0x0EB9274D, 0xB6054028, 0xA4B0EFC6, 0x1C0C88A3, +330 0x81DBB01A, 0x3967D77F, 0x2BD27891, 0x936E1FF4, +331 0x3B26F703, 0x839A9066, 0x912F3F88, 0x299358ED, +332 0xB4446054, 0x0CF80731, 0x1E4DA8DF, 0xA6F1CFBA, +333 0xFE92DFEC, 0x462EB889, 0x549B1767, 0xEC277002, +334 0x71F048BB, 0xC94C2FDE, 0xDBF98030, 0x6345E755, +335 0x6B3FA09C, 0xD383C7F9, 0xC1366817, 0x798A0F72, +336 0xE45D37CB, 0x5CE150AE, 0x4E54FF40, 0xF6E89825, +337 0xAE8B8873, 0x1637EF16, 0x048240F8, 0xBC3E279D, +338 0x21E91F24, 0x99557841, 0x8BE0D7AF, 0x335CB0CA, +339 0xED59B63B, 0x55E5D15E, 0x47507EB0, 0xFFEC19D5, +340 0x623B216C, 0xDA874609, 0xC832E9E7, 0x708E8E82, +341 0x28ED9ED4, 0x9051F9B1, 0x82E4565F, 0x3A58313A, +342 0xA78F0983, 0x1F336EE6, 0x0D86C108, 0xB53AA66D, +343 0xBD40E1A4, 0x05FC86C1, 0x1749292F, 0xAFF54E4A, +344 0x322276F3, 0x8A9E1196, 0x982BBE78, 0x2097D91D, +345 0x78F4C94B, 0xC048AE2E, 0xD2FD01C0, 0x6A4166A5, +346 0xF7965E1C, 0x4F2A3979, 0x5D9F9697, 0xE523F1F2, +347 0x4D6B1905, 0xF5D77E60, 0xE762D18E, 0x5FDEB6EB, +348 0xC2098E52, 0x7AB5E937, 0x680046D9, 0xD0BC21BC, +349 0x88DF31EA, 0x3063568F, 0x22D6F961, 0x9A6A9E04, +350 0x07BDA6BD, 0xBF01C1D8, 0xADB46E36, 0x15080953, +351 0x1D724E9A, 0xA5CE29FF, 0xB77B8611, 0x0FC7E174, +352 0x9210D9CD, 0x2AACBEA8, 0x38191146, 0x80A57623, +353 0xD8C66675, 0x607A0110, 0x72CFAEFE, 0xCA73C99B, +354 0x57A4F122, 0xEF189647, 0xFDAD39A9, 0x45115ECC, +355 0x764DEE06, 0xCEF18963, 0xDC44268D, 0x64F841E8, +356 0xF92F7951, 0x41931E34, 0x5326B1DA, 0xEB9AD6BF, +357 0xB3F9C6E9, 0x0B45A18C, 0x19F00E62, 0xA14C6907, +358 0x3C9B51BE, 0x842736DB, 0x96929935, 0x2E2EFE50, +359 0x2654B999, 0x9EE8DEFC, 0x8C5D7112, 0x34E11677, +360 0xA9362ECE, 0x118A49AB, 0x033FE645, 0xBB838120, +361 0xE3E09176, 0x5B5CF613, 0x49E959FD, 0xF1553E98, +362 0x6C820621, 0xD43E6144, 0xC68BCEAA, 0x7E37A9CF, +363 0xD67F4138, 0x6EC3265D, 0x7C7689B3, 0xC4CAEED6, +364 0x591DD66F, 0xE1A1B10A, 0xF3141EE4, 0x4BA87981, +365 0x13CB69D7, 0xAB770EB2, 0xB9C2A15C, 0x017EC639, +366 0x9CA9FE80, 0x241599E5, 0x36A0360B, 0x8E1C516E, +367 0x866616A7, 0x3EDA71C2, 0x2C6FDE2C, 0x94D3B949, +368 0x090481F0, 0xB1B8E695, 0xA30D497B, 0x1BB12E1E, +369 0x43D23E48, 0xFB6E592D, 0xE9DBF6C3, 0x516791A6, +370 0xCCB0A91F, 0x740CCE7A, 0x66B96194, 0xDE0506F1, +371 /* T8_4 */ +372 0x00000000, 0x3D6029B0, 0x7AC05360, 0x47A07AD0, +373 0xF580A6C0, 0xC8E08F70, 0x8F40F5A0, 0xB220DC10, +374 0x30704BC1, 0x0D106271, 0x4AB018A1, 0x77D03111, +375 0xC5F0ED01, 0xF890C4B1, 0xBF30BE61, 0x825097D1, +376 0x60E09782, 0x5D80BE32, 0x1A20C4E2, 0x2740ED52, +377 0x95603142, 0xA80018F2, 0xEFA06222, 0xD2C04B92, +378 0x5090DC43, 0x6DF0F5F3, 0x2A508F23, 0x1730A693, +379 0xA5107A83, 0x98705333, 0xDFD029E3, 0xE2B00053, +380 0xC1C12F04, 0xFCA106B4, 0xBB017C64, 0x866155D4, +381 0x344189C4, 0x0921A074, 0x4E81DAA4, 0x73E1F314, +382 0xF1B164C5, 0xCCD14D75, 0x8B7137A5, 0xB6111E15, +383 0x0431C205, 0x3951EBB5, 0x7EF19165, 0x4391B8D5, +384 0xA121B886, 0x9C419136, 0xDBE1EBE6, 0xE681C256, +385 0x54A11E46, 0x69C137F6, 0x2E614D26, 0x13016496, +386 0x9151F347, 0xAC31DAF7, 0xEB91A027, 0xD6F18997, +387 0x64D15587, 0x59B17C37, 0x1E1106E7, 0x23712F57, +388 0x58F35849, 0x659371F9, 0x22330B29, 0x1F532299, +389 0xAD73FE89, 0x9013D739, 0xD7B3ADE9, 0xEAD38459, +390 0x68831388, 0x55E33A38, 0x124340E8, 0x2F236958, +391 0x9D03B548, 0xA0639CF8, 0xE7C3E628, 0xDAA3CF98, +392 0x3813CFCB, 0x0573E67B, 0x42D39CAB, 0x7FB3B51B, +393 0xCD93690B, 0xF0F340BB, 0xB7533A6B, 0x8A3313DB, +394 0x0863840A, 0x3503ADBA, 0x72A3D76A, 0x4FC3FEDA, +395 0xFDE322CA, 0xC0830B7A, 0x872371AA, 0xBA43581A, +396 0x9932774D, 0xA4525EFD, 0xE3F2242D, 0xDE920D9D, +397 0x6CB2D18D, 0x51D2F83D, 0x167282ED, 0x2B12AB5D, +398 0xA9423C8C, 0x9422153C, 0xD3826FEC, 0xEEE2465C, +399 0x5CC29A4C, 0x61A2B3FC, 0x2602C92C, 0x1B62E09C, +400 0xF9D2E0CF, 0xC4B2C97F, 0x8312B3AF, 0xBE729A1F, +401 0x0C52460F, 0x31326FBF, 0x7692156F, 0x4BF23CDF, +402 0xC9A2AB0E, 0xF4C282BE, 0xB362F86E, 0x8E02D1DE, +403 0x3C220DCE, 0x0142247E, 0x46E25EAE, 0x7B82771E, +404 0xB1E6B092, 0x8C869922, 0xCB26E3F2, 0xF646CA42, +405 0x44661652, 0x79063FE2, 0x3EA64532, 0x03C66C82, +406 0x8196FB53, 0xBCF6D2E3, 0xFB56A833, 0xC6368183, +407 0x74165D93, 0x49767423, 0x0ED60EF3, 0x33B62743, +408 0xD1062710, 0xEC660EA0, 0xABC67470, 0x96A65DC0, +409 0x248681D0, 0x19E6A860, 0x5E46D2B0, 0x6326FB00, +410 0xE1766CD1, 0xDC164561, 0x9BB63FB1, 0xA6D61601, +411 0x14F6CA11, 0x2996E3A1, 0x6E369971, 0x5356B0C1, +412 0x70279F96, 0x4D47B626, 0x0AE7CCF6, 0x3787E546, +413 0x85A73956, 0xB8C710E6, 0xFF676A36, 0xC2074386, +414 0x4057D457, 0x7D37FDE7, 0x3A978737, 0x07F7AE87, +415 0xB5D77297, 0x88B75B27, 0xCF1721F7, 0xF2770847, +416 0x10C70814, 0x2DA721A4, 0x6A075B74, 0x576772C4, +417 0xE547AED4, 0xD8278764, 0x9F87FDB4, 0xA2E7D404, +418 0x20B743D5, 0x1DD76A65, 0x5A7710B5, 0x67173905, +419 0xD537E515, 0xE857CCA5, 0xAFF7B675, 0x92979FC5, +420 0xE915E8DB, 0xD475C16B, 0x93D5BBBB, 0xAEB5920B, +421 0x1C954E1B, 0x21F567AB, 0x66551D7B, 0x5B3534CB, +422 0xD965A31A, 0xE4058AAA, 0xA3A5F07A, 0x9EC5D9CA, +423 0x2CE505DA, 0x11852C6A, 0x562556BA, 0x6B457F0A, +424 0x89F57F59, 0xB49556E9, 0xF3352C39, 0xCE550589, +425 0x7C75D999, 0x4115F029, 0x06B58AF9, 0x3BD5A349, +426 0xB9853498, 0x84E51D28, 0xC34567F8, 0xFE254E48, +427 0x4C059258, 0x7165BBE8, 0x36C5C138, 0x0BA5E888, +428 0x28D4C7DF, 0x15B4EE6F, 0x521494BF, 0x6F74BD0F, +429 0xDD54611F, 0xE03448AF, 0xA794327F, 0x9AF41BCF, +430 0x18A48C1E, 0x25C4A5AE, 0x6264DF7E, 0x5F04F6CE, +431 0xED242ADE, 0xD044036E, 0x97E479BE, 0xAA84500E, +432 0x4834505D, 0x755479ED, 0x32F4033D, 0x0F942A8D, +433 0xBDB4F69D, 0x80D4DF2D, 0xC774A5FD, 0xFA148C4D, +434 0x78441B9C, 0x4524322C, 0x028448FC, 0x3FE4614C, +435 0x8DC4BD5C, 0xB0A494EC, 0xF704EE3C, 0xCA64C78C, +436 /* T8_5 */ +437 0x00000000, 0xCB5CD3A5, 0x4DC8A10B, 0x869472AE, +438 0x9B914216, 0x50CD91B3, 0xD659E31D, 0x1D0530B8, +439 0xEC53826D, 0x270F51C8, 0xA19B2366, 0x6AC7F0C3, +440 0x77C2C07B, 0xBC9E13DE, 0x3A0A6170, 0xF156B2D5, +441 0x03D6029B, 0xC88AD13E, 0x4E1EA390, 0x85427035, +442 0x9847408D, 0x531B9328, 0xD58FE186, 0x1ED33223, +443 0xEF8580F6, 0x24D95353, 0xA24D21FD, 0x6911F258, +444 0x7414C2E0, 0xBF481145, 0x39DC63EB, 0xF280B04E, +445 0x07AC0536, 0xCCF0D693, 0x4A64A43D, 0x81387798, +446 0x9C3D4720, 0x57619485, 0xD1F5E62B, 0x1AA9358E, +447 0xEBFF875B, 0x20A354FE, 0xA6372650, 0x6D6BF5F5, +448 0x706EC54D, 0xBB3216E8, 0x3DA66446, 0xF6FAB7E3, +449 0x047A07AD, 0xCF26D408, 0x49B2A6A6, 0x82EE7503, +450 0x9FEB45BB, 0x54B7961E, 0xD223E4B0, 0x197F3715, +451 0xE82985C0, 0x23755665, 0xA5E124CB, 0x6EBDF76E, +452 0x73B8C7D6, 0xB8E41473, 0x3E7066DD, 0xF52CB578, +453 0x0F580A6C, 0xC404D9C9, 0x4290AB67, 0x89CC78C2, +454 0x94C9487A, 0x5F959BDF, 0xD901E971, 0x125D3AD4, +455 0xE30B8801, 0x28575BA4, 0xAEC3290A, 0x659FFAAF, +456 0x789ACA17, 0xB3C619B2, 0x35526B1C, 0xFE0EB8B9, +457 0x0C8E08F7, 0xC7D2DB52, 0x4146A9FC, 0x8A1A7A59, +458 0x971F4AE1, 0x5C439944, 0xDAD7EBEA, 0x118B384F, +459 0xE0DD8A9A, 0x2B81593F, 0xAD152B91, 0x6649F834, +460 0x7B4CC88C, 0xB0101B29, 0x36846987, 0xFDD8BA22, +461 0x08F40F5A, 0xC3A8DCFF, 0x453CAE51, 0x8E607DF4, +462 0x93654D4C, 0x58399EE9, 0xDEADEC47, 0x15F13FE2, +463 0xE4A78D37, 0x2FFB5E92, 0xA96F2C3C, 0x6233FF99, +464 0x7F36CF21, 0xB46A1C84, 0x32FE6E2A, 0xF9A2BD8F, +465 0x0B220DC1, 0xC07EDE64, 0x46EAACCA, 0x8DB67F6F, +466 0x90B34FD7, 0x5BEF9C72, 0xDD7BEEDC, 0x16273D79, +467 0xE7718FAC, 0x2C2D5C09, 0xAAB92EA7, 0x61E5FD02, +468 0x7CE0CDBA, 0xB7BC1E1F, 0x31286CB1, 0xFA74BF14, +469 0x1EB014D8, 0xD5ECC77D, 0x5378B5D3, 0x98246676, +470 0x852156CE, 0x4E7D856B, 0xC8E9F7C5, 0x03B52460, +471 0xF2E396B5, 0x39BF4510, 0xBF2B37BE, 0x7477E41B, +472 0x6972D4A3, 0xA22E0706, 0x24BA75A8, 0xEFE6A60D, +473 0x1D661643, 0xD63AC5E6, 0x50AEB748, 0x9BF264ED, +474 0x86F75455, 0x4DAB87F0, 0xCB3FF55E, 0x006326FB, +475 0xF135942E, 0x3A69478B, 0xBCFD3525, 0x77A1E680, +476 0x6AA4D638, 0xA1F8059D, 0x276C7733, 0xEC30A496, +477 0x191C11EE, 0xD240C24B, 0x54D4B0E5, 0x9F886340, +478 0x828D53F8, 0x49D1805D, 0xCF45F2F3, 0x04192156, +479 0xF54F9383, 0x3E134026, 0xB8873288, 0x73DBE12D, +480 0x6EDED195, 0xA5820230, 0x2316709E, 0xE84AA33B, +481 0x1ACA1375, 0xD196C0D0, 0x5702B27E, 0x9C5E61DB, +482 0x815B5163, 0x4A0782C6, 0xCC93F068, 0x07CF23CD, +483 0xF6999118, 0x3DC542BD, 0xBB513013, 0x700DE3B6, +484 0x6D08D30E, 0xA65400AB, 0x20C07205, 0xEB9CA1A0, +485 0x11E81EB4, 0xDAB4CD11, 0x5C20BFBF, 0x977C6C1A, +486 0x8A795CA2, 0x41258F07, 0xC7B1FDA9, 0x0CED2E0C, +487 0xFDBB9CD9, 0x36E74F7C, 0xB0733DD2, 0x7B2FEE77, +488 0x662ADECF, 0xAD760D6A, 0x2BE27FC4, 0xE0BEAC61, +489 0x123E1C2F, 0xD962CF8A, 0x5FF6BD24, 0x94AA6E81, +490 0x89AF5E39, 0x42F38D9C, 0xC467FF32, 0x0F3B2C97, +491 0xFE6D9E42, 0x35314DE7, 0xB3A53F49, 0x78F9ECEC, +492 0x65FCDC54, 0xAEA00FF1, 0x28347D5F, 0xE368AEFA, +493 0x16441B82, 0xDD18C827, 0x5B8CBA89, 0x90D0692C, +494 0x8DD55994, 0x46898A31, 0xC01DF89F, 0x0B412B3A, +495 0xFA1799EF, 0x314B4A4A, 0xB7DF38E4, 0x7C83EB41, +496 0x6186DBF9, 0xAADA085C, 0x2C4E7AF2, 0xE712A957, +497 0x15921919, 0xDECECABC, 0x585AB812, 0x93066BB7, +498 0x8E035B0F, 0x455F88AA, 0xC3CBFA04, 0x089729A1, +499 0xF9C19B74, 0x329D48D1, 0xB4093A7F, 0x7F55E9DA, +500 0x6250D962, 0xA90C0AC7, 0x2F987869, 0xE4C4ABCC, +501 /* T8_6 */ +502 0x00000000, 0xA6770BB4, 0x979F1129, 0x31E81A9D, +503 0xF44F2413, 0x52382FA7, 0x63D0353A, 0xC5A73E8E, +504 0x33EF4E67, 0x959845D3, 0xA4705F4E, 0x020754FA, +505 0xC7A06A74, 0x61D761C0, 0x503F7B5D, 0xF64870E9, +506 0x67DE9CCE, 0xC1A9977A, 0xF0418DE7, 0x56368653, +507 0x9391B8DD, 0x35E6B369, 0x040EA9F4, 0xA279A240, +508 0x5431D2A9, 0xF246D91D, 0xC3AEC380, 0x65D9C834, +509 0xA07EF6BA, 0x0609FD0E, 0x37E1E793, 0x9196EC27, +510 0xCFBD399C, 0x69CA3228, 0x582228B5, 0xFE552301, +511 0x3BF21D8F, 0x9D85163B, 0xAC6D0CA6, 0x0A1A0712, +512 0xFC5277FB, 0x5A257C4F, 0x6BCD66D2, 0xCDBA6D66, +513 0x081D53E8, 0xAE6A585C, 0x9F8242C1, 0x39F54975, +514 0xA863A552, 0x0E14AEE6, 0x3FFCB47B, 0x998BBFCF, +515 0x5C2C8141, 0xFA5B8AF5, 0xCBB39068, 0x6DC49BDC, +516 0x9B8CEB35, 0x3DFBE081, 0x0C13FA1C, 0xAA64F1A8, +517 0x6FC3CF26, 0xC9B4C492, 0xF85CDE0F, 0x5E2BD5BB, +518 0x440B7579, 0xE27C7ECD, 0xD3946450, 0x75E36FE4, +519 0xB044516A, 0x16335ADE, 0x27DB4043, 0x81AC4BF7, +520 0x77E43B1E, 0xD19330AA, 0xE07B2A37, 0x460C2183, +521 0x83AB1F0D, 0x25DC14B9, 0x14340E24, 0xB2430590, +522 0x23D5E9B7, 0x85A2E203, 0xB44AF89E, 0x123DF32A, +523 0xD79ACDA4, 0x71EDC610, 0x4005DC8D, 0xE672D739, +524 0x103AA7D0, 0xB64DAC64, 0x87A5B6F9, 0x21D2BD4D, +525 0xE47583C3, 0x42028877, 0x73EA92EA, 0xD59D995E, +526 0x8BB64CE5, 0x2DC14751, 0x1C295DCC, 0xBA5E5678, +527 0x7FF968F6, 0xD98E6342, 0xE86679DF, 0x4E11726B, +528 0xB8590282, 0x1E2E0936, 0x2FC613AB, 0x89B1181F, +529 0x4C162691, 0xEA612D25, 0xDB8937B8, 0x7DFE3C0C, +530 0xEC68D02B, 0x4A1FDB9F, 0x7BF7C102, 0xDD80CAB6, +531 0x1827F438, 0xBE50FF8C, 0x8FB8E511, 0x29CFEEA5, +532 0xDF879E4C, 0x79F095F8, 0x48188F65, 0xEE6F84D1, +533 0x2BC8BA5F, 0x8DBFB1EB, 0xBC57AB76, 0x1A20A0C2, +534 0x8816EAF2, 0x2E61E146, 0x1F89FBDB, 0xB9FEF06F, +535 0x7C59CEE1, 0xDA2EC555, 0xEBC6DFC8, 0x4DB1D47C, +536 0xBBF9A495, 0x1D8EAF21, 0x2C66B5BC, 0x8A11BE08, +537 0x4FB68086, 0xE9C18B32, 0xD82991AF, 0x7E5E9A1B, +538 0xEFC8763C, 0x49BF7D88, 0x78576715, 0xDE206CA1, +539 0x1B87522F, 0xBDF0599B, 0x8C184306, 0x2A6F48B2, +540 0xDC27385B, 0x7A5033EF, 0x4BB82972, 0xEDCF22C6, +541 0x28681C48, 0x8E1F17FC, 0xBFF70D61, 0x198006D5, +542 0x47ABD36E, 0xE1DCD8DA, 0xD034C247, 0x7643C9F3, +543 0xB3E4F77D, 0x1593FCC9, 0x247BE654, 0x820CEDE0, +544 0x74449D09, 0xD23396BD, 0xE3DB8C20, 0x45AC8794, +545 0x800BB91A, 0x267CB2AE, 0x1794A833, 0xB1E3A387, +546 0x20754FA0, 0x86024414, 0xB7EA5E89, 0x119D553D, +547 0xD43A6BB3, 0x724D6007, 0x43A57A9A, 0xE5D2712E, +548 0x139A01C7, 0xB5ED0A73, 0x840510EE, 0x22721B5A, +549 0xE7D525D4, 0x41A22E60, 0x704A34FD, 0xD63D3F49, +550 0xCC1D9F8B, 0x6A6A943F, 0x5B828EA2, 0xFDF58516, +551 0x3852BB98, 0x9E25B02C, 0xAFCDAAB1, 0x09BAA105, +552 0xFFF2D1EC, 0x5985DA58, 0x686DC0C5, 0xCE1ACB71, +553 0x0BBDF5FF, 0xADCAFE4B, 0x9C22E4D6, 0x3A55EF62, +554 0xABC30345, 0x0DB408F1, 0x3C5C126C, 0x9A2B19D8, +555 0x5F8C2756, 0xF9FB2CE2, 0xC813367F, 0x6E643DCB, +556 0x982C4D22, 0x3E5B4696, 0x0FB35C0B, 0xA9C457BF, +557 0x6C636931, 0xCA146285, 0xFBFC7818, 0x5D8B73AC, +558 0x03A0A617, 0xA5D7ADA3, 0x943FB73E, 0x3248BC8A, +559 0xF7EF8204, 0x519889B0, 0x6070932D, 0xC6079899, +560 0x304FE870, 0x9638E3C4, 0xA7D0F959, 0x01A7F2ED, +561 0xC400CC63, 0x6277C7D7, 0x539FDD4A, 0xF5E8D6FE, +562 0x647E3AD9, 0xC209316D, 0xF3E12BF0, 0x55962044, +563 0x90311ECA, 0x3646157E, 0x07AE0FE3, 0xA1D90457, +564 0x579174BE, 0xF1E67F0A, 0xC00E6597, 0x66796E23, +565 0xA3DE50AD, 0x05A95B19, 0x34414184, 0x92364A30, +566 /* T8_7 */ +567 0x00000000, 0xCCAA009E, 0x4225077D, 0x8E8F07E3, +568 0x844A0EFA, 0x48E00E64, 0xC66F0987, 0x0AC50919, +569 0xD3E51BB5, 0x1F4F1B2B, 0x91C01CC8, 0x5D6A1C56, +570 0x57AF154F, 0x9B0515D1, 0x158A1232, 0xD92012AC, +571 0x7CBB312B, 0xB01131B5, 0x3E9E3656, 0xF23436C8, +572 0xF8F13FD1, 0x345B3F4F, 0xBAD438AC, 0x767E3832, +573 0xAF5E2A9E, 0x63F42A00, 0xED7B2DE3, 0x21D12D7D, +574 0x2B142464, 0xE7BE24FA, 0x69312319, 0xA59B2387, +575 0xF9766256, 0x35DC62C8, 0xBB53652B, 0x77F965B5, +576 0x7D3C6CAC, 0xB1966C32, 0x3F196BD1, 0xF3B36B4F, +577 0x2A9379E3, 0xE639797D, 0x68B67E9E, 0xA41C7E00, +578 0xAED97719, 0x62737787, 0xECFC7064, 0x205670FA, +579 0x85CD537D, 0x496753E3, 0xC7E85400, 0x0B42549E, +580 0x01875D87, 0xCD2D5D19, 0x43A25AFA, 0x8F085A64, +581 0x562848C8, 0x9A824856, 0x140D4FB5, 0xD8A74F2B, +582 0xD2624632, 0x1EC846AC, 0x9047414F, 0x5CED41D1, +583 0x299DC2ED, 0xE537C273, 0x6BB8C590, 0xA712C50E, +584 0xADD7CC17, 0x617DCC89, 0xEFF2CB6A, 0x2358CBF4, +585 0xFA78D958, 0x36D2D9C6, 0xB85DDE25, 0x74F7DEBB, +586 0x7E32D7A2, 0xB298D73C, 0x3C17D0DF, 0xF0BDD041, +587 0x5526F3C6, 0x998CF358, 0x1703F4BB, 0xDBA9F425, +588 0xD16CFD3C, 0x1DC6FDA2, 0x9349FA41, 0x5FE3FADF, +589 0x86C3E873, 0x4A69E8ED, 0xC4E6EF0E, 0x084CEF90, +590 0x0289E689, 0xCE23E617, 0x40ACE1F4, 0x8C06E16A, +591 0xD0EBA0BB, 0x1C41A025, 0x92CEA7C6, 0x5E64A758, +592 0x54A1AE41, 0x980BAEDF, 0x1684A93C, 0xDA2EA9A2, +593 0x030EBB0E, 0xCFA4BB90, 0x412BBC73, 0x8D81BCED, +594 0x8744B5F4, 0x4BEEB56A, 0xC561B289, 0x09CBB217, +595 0xAC509190, 0x60FA910E, 0xEE7596ED, 0x22DF9673, +596 0x281A9F6A, 0xE4B09FF4, 0x6A3F9817, 0xA6959889, +597 0x7FB58A25, 0xB31F8ABB, 0x3D908D58, 0xF13A8DC6, +598 0xFBFF84DF, 0x37558441, 0xB9DA83A2, 0x7570833C, +599 0x533B85DA, 0x9F918544, 0x111E82A7, 0xDDB48239, +600 0xD7718B20, 0x1BDB8BBE, 0x95548C5D, 0x59FE8CC3, +601 0x80DE9E6F, 0x4C749EF1, 0xC2FB9912, 0x0E51998C, +602 0x04949095, 0xC83E900B, 0x46B197E8, 0x8A1B9776, +603 0x2F80B4F1, 0xE32AB46F, 0x6DA5B38C, 0xA10FB312, +604 0xABCABA0B, 0x6760BA95, 0xE9EFBD76, 0x2545BDE8, +605 0xFC65AF44, 0x30CFAFDA, 0xBE40A839, 0x72EAA8A7, +606 0x782FA1BE, 0xB485A120, 0x3A0AA6C3, 0xF6A0A65D, +607 0xAA4DE78C, 0x66E7E712, 0xE868E0F1, 0x24C2E06F, +608 0x2E07E976, 0xE2ADE9E8, 0x6C22EE0B, 0xA088EE95, +609 0x79A8FC39, 0xB502FCA7, 0x3B8DFB44, 0xF727FBDA, +610 0xFDE2F2C3, 0x3148F25D, 0xBFC7F5BE, 0x736DF520, +611 0xD6F6D6A7, 0x1A5CD639, 0x94D3D1DA, 0x5879D144, +612 0x52BCD85D, 0x9E16D8C3, 0x1099DF20, 0xDC33DFBE, +613 0x0513CD12, 0xC9B9CD8C, 0x4736CA6F, 0x8B9CCAF1, +614 0x8159C3E8, 0x4DF3C376, 0xC37CC495, 0x0FD6C40B, +615 0x7AA64737, 0xB60C47A9, 0x3883404A, 0xF42940D4, +616 0xFEEC49CD, 0x32464953, 0xBCC94EB0, 0x70634E2E, +617 0xA9435C82, 0x65E95C1C, 0xEB665BFF, 0x27CC5B61, +618 0x2D095278, 0xE1A352E6, 0x6F2C5505, 0xA386559B, +619 0x061D761C, 0xCAB77682, 0x44387161, 0x889271FF, +620 0x825778E6, 0x4EFD7878, 0xC0727F9B, 0x0CD87F05, +621 0xD5F86DA9, 0x19526D37, 0x97DD6AD4, 0x5B776A4A, +622 0x51B26353, 0x9D1863CD, 0x1397642E, 0xDF3D64B0, +623 0x83D02561, 0x4F7A25FF, 0xC1F5221C, 0x0D5F2282, +624 0x079A2B9B, 0xCB302B05, 0x45BF2CE6, 0x89152C78, +625 0x50353ED4, 0x9C9F3E4A, 0x121039A9, 0xDEBA3937, +626 0xD47F302E, 0x18D530B0, 0x965A3753, 0x5AF037CD, +627 0xFF6B144A, 0x33C114D4, 0xBD4E1337, 0x71E413A9, +628 0x7B211AB0, 0xB78B1A2E, 0x39041DCD, 0xF5AE1D53, +629 0x2C8E0FFF, 0xE0240F61, 0x6EAB0882, 0xA201081C, +630 0xA8C40105, 0x646E019B, 0xEAE10678, 0x264B06E6 +631 }; +632} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017/* +018 * Some portions of this file Copyright (c) 2004-2006 Intel Corportation +019 * and licensed under the BSD license. +020 */ +021package org.apache.commons.codec.digest; +022 +023import java.util.zip.Checksum; +024 +025/** +026 * A pure-java implementation of the CRC32 checksum that uses +027 * the CRC32-C polynomial, the same polynomial used by iSCSI +028 * and implemented on many Intel chipsets supporting SSE4.2. +029 * +030 * Copied from Hadoop 2.3.6: +031 * https://git-wip-us.apache.org/repos/asf?p=hadoop.git;a=blob_plain; +032 * f=hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java; +033 * hb=2120de588b92b9f22b1cc4188761d6a8c61aa778 +034 * <p> +035 * This class is Not ThreadSafe +036 * @since 1.11 +037 */ +038public class PureJavaCrc32C implements Checksum { +039 +040 /** the current CRC value, bit-flipped */ +041 private int crc; +042 +043 /** Create a new PureJavaCrc32 object. */ +044 public PureJavaCrc32C() { +045 reset(); +046 } +047 +048 @Override +049 public long getValue() { +050 final long ret = crc; +051 return (~ret) & 0xffffffffL; +052 } +053 +054 @Override +055 public void reset() { +056 crc = 0xffffffff; +057 } +058 +059 @Override +060 public void update(final byte[] b, int off, int len) { +061 int localCrc = crc; +062 +063 while(len > 7) { +064 final int c0 =(b[off+0] ^ localCrc) & 0xff; +065 final int c1 =(b[off+1] ^ (localCrc >>>= 8)) & 0xff; +066 final int c2 =(b[off+2] ^ (localCrc >>>= 8)) & 0xff; +067 final int c3 =(b[off+3] ^ (localCrc >>>= 8)) & 0xff; +068 localCrc = (T[T8_7_start + c0] ^ T[T8_6_start + c1]) ^ +069 (T[T8_5_start + c2] ^ T[T8_4_start + c3]); +070 +071 final int c4 = b[off+4] & 0xff; +072 final int c5 = b[off+5] & 0xff; +073 final int c6 = b[off+6] & 0xff; +074 final int c7 = b[off+7] & 0xff; +075 +076 localCrc ^= (T[T8_3_start + c4] ^ T[T8_2_start + c5]) ^ +077 (T[T8_1_start + c6] ^ T[T8_0_start + c7]); +078 +079 off += 8; +080 len -= 8; +081 } +082 +083 /* loop unroll - duff's device style */ +084 switch(len) { +085 case 7: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +086 case 6: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +087 case 5: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +088 case 4: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +089 case 3: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +090 case 2: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +091 case 1: localCrc = (localCrc >>> 8) ^ T[T8_0_start + ((localCrc ^ b[off++]) & 0xff)]; +092 default: +093 break; // satisfy Findbugs +094 } +095 +096 // Publish crc out to object +097 crc = localCrc; +098 } +099 +100 @Override +101 final public void update(final int b) { +102 crc = (crc >>> 8) ^ T[T8_0_start + ((crc ^ b) & 0xff)]; +103 } +104 +105 // CRC polynomial tables generated by: +106 // java -cp build/test/classes/:build/classes/ \ +107 // org.apache.hadoop.util.TestPureJavaCrc32\$Table 82F63B78 +108 +109 private static final int T8_0_start = 0*256; +110 private static final int T8_1_start = 1*256; +111 private static final int T8_2_start = 2*256; +112 private static final int T8_3_start = 3*256; +113 private static final int T8_4_start = 4*256; +114 private static final int T8_5_start = 5*256; +115 private static final int T8_6_start = 6*256; +116 private static final int T8_7_start = 7*256; +117 +118 private static final int[] T = new int[] { +119 /* T8_0 */ +120 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, +121 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, +122 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, +123 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, +124 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, +125 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, +126 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, +127 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, +128 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, +129 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, +130 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, +131 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, +132 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, +133 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, +134 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, +135 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, +136 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, +137 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, +138 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, +139 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, +140 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, +141 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, +142 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, +143 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, +144 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, +145 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, +146 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, +147 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, +148 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, +149 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, +150 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, +151 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, +152 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, +153 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, +154 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, +155 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, +156 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, +157 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, +158 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, +159 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, +160 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, +161 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, +162 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, +163 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, +164 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, +165 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, +166 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, +167 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, +168 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, +169 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, +170 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, +171 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, +172 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, +173 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, +174 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, +175 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, +176 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, +177 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, +178 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, +179 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, +180 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, +181 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, +182 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, +183 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351, +184 /* T8_1 */ +185 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, +186 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, +187 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, +188 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, +189 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, +190 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, +191 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, +192 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, +193 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, +194 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, +195 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, +196 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, +197 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, +198 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, +199 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, +200 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, +201 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, +202 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, +203 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, +204 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, +205 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, +206 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, +207 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, +208 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, +209 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, +210 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, +211 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, +212 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, +213 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, +214 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, +215 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, +216 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, +217 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, +218 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, +219 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, +220 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, +221 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, +222 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, +223 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, +224 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, +225 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, +226 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, +227 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, +228 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, +229 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, +230 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, +231 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, +232 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, +233 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, +234 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, +235 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, +236 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, +237 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, +238 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, +239 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, +240 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, +241 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, +242 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, +243 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, +244 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, +245 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, +246 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, +247 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, +248 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483, +249 /* T8_2 */ +250 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, +251 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, +252 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, +253 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, +254 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, +255 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, +256 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, +257 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, +258 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, +259 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, +260 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, +261 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, +262 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, +263 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, +264 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, +265 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, +266 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, +267 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, +268 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, +269 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, +270 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, +271 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, +272 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, +273 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, +274 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, +275 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, +276 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, +277 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, +278 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, +279 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, +280 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, +281 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, +282 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, +283 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, +284 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, +285 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, +286 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, +287 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, +288 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, +289 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, +290 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, +291 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, +292 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, +293 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, +294 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, +295 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, +296 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, +297 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, +298 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, +299 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, +300 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, +301 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, +302 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, +303 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, +304 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, +305 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, +306 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, +307 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, +308 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, +309 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, +310 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, +311 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, +312 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, +313 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8, +314 /* T8_3 */ +315 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, +316 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, +317 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, +318 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, +319 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, +320 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, +321 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, +322 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, +323 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, +324 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, +325 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, +326 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, +327 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, +328 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, +329 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, +330 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, +331 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, +332 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, +333 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, +334 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, +335 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, +336 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, +337 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, +338 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, +339 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, +340 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, +341 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, +342 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, +343 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, +344 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, +345 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, +346 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, +347 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, +348 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, +349 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, +350 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, +351 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, +352 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, +353 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, +354 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, +355 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, +356 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, +357 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, +358 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, +359 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, +360 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, +361 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, +362 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, +363 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, +364 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, +365 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, +366 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, +367 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, +368 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, +369 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, +370 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, +371 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, +372 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, +373 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, +374 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, +375 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, +376 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, +377 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, +378 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842, +379 /* T8_4 */ +380 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, +381 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, +382 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, +383 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, +384 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, +385 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, +386 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, +387 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, +388 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, +389 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, +390 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, +391 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, +392 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, +393 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, +394 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, +395 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, +396 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, +397 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, +398 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, +399 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, +400 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, +401 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, +402 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, +403 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, +404 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, +405 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, +406 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, +407 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, +408 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, +409 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, +410 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, +411 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, +412 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, +413 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, +414 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, +415 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, +416 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, +417 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, +418 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, +419 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, +420 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, +421 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, +422 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, +423 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, +424 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, +425 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, +426 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, +427 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, +428 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, +429 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, +430 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, +431 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, +432 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, +433 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, +434 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, +435 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, +436 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, +437 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, +438 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, +439 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, +440 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, +441 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, +442 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, +443 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3, +444 /* T8_5 */ +445 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, +446 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, +447 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, +448 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, +449 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, +450 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, +451 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, +452 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, +453 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, +454 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, +455 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, +456 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, +457 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, +458 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, +459 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, +460 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, +461 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, +462 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, +463 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, +464 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, +465 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, +466 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, +467 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, +468 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, +469 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, +470 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, +471 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, +472 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, +473 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, +474 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, +475 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, +476 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, +477 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, +478 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, +479 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, +480 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, +481 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, +482 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, +483 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, +484 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, +485 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, +486 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, +487 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, +488 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, +489 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, +490 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, +491 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, +492 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, +493 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, +494 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, +495 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, +496 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, +497 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, +498 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, +499 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, +500 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, +501 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, +502 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, +503 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, +504 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, +505 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, +506 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, +507 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, +508 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C, +509 /* T8_6 */ +510 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, +511 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, +512 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, +513 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, +514 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, +515 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, +516 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, +517 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, +518 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, +519 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, +520 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, +521 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, +522 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, +523 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, +524 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, +525 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, +526 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, +527 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, +528 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, +529 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, +530 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, +531 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, +532 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, +533 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, +534 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, +535 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, +536 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, +537 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, +538 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, +539 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, +540 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, +541 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, +542 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, +543 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, +544 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, +545 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, +546 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, +547 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, +548 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, +549 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, +550 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, +551 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, +552 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, +553 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, +554 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, +555 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, +556 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, +557 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, +558 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, +559 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, +560 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, +561 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, +562 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, +563 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, +564 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, +565 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, +566 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, +567 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, +568 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, +569 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, +570 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, +571 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, +572 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, +573 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F, +574 /* T8_7 */ +575 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, +576 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, +577 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, +578 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, +579 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, +580 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, +581 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, +582 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, +583 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, +584 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, +585 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, +586 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, +587 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, +588 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, +589 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, +590 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, +591 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, +592 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, +593 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, +594 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, +595 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, +596 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, +597 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, +598 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, +599 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, +600 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, +601 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, +602 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, +603 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, +604 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, +605 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, +606 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, +607 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, +608 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, +609 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, +610 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, +611 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, +612 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, +613 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, +614 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, +615 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, +616 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, +617 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, +618 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, +619 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, +620 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, +621 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, +622 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, +623 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, +624 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, +625 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, +626 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, +627 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, +628 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, +629 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, +630 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, +631 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, +632 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, +633 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, +634 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, +635 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, +636 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, +637 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, +638 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 +639 }; +640} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.digest; +018 +019import java.security.MessageDigest; +020import java.security.NoSuchAlgorithmException; +021import java.security.SecureRandom; +022import java.util.Arrays; +023import java.util.Random; +024import java.util.concurrent.ThreadLocalRandom; +025import java.util.regex.Matcher; +026import java.util.regex.Pattern; +027 +028import org.apache.commons.codec.Charsets; +029 +030/** +031 * SHA2-based Unix crypt implementation. +032 * <p> +033 * Based on the C implementation released into the Public Domain by Ulrich Drepper <drepper@redhat.com> +034 * http://www.akkadia.org/drepper/SHA-crypt.txt +035 * <p> +036 * Conversion to Kotlin and from there to Java in 2012 by Christian Hammers <ch@lathspell.de> and likewise put +037 * into the Public Domain. +038 * <p> +039 * This class is immutable and thread-safe. +040 * +041 * @version $Id$ +042 * @since 1.7 +043 */ +044public class Sha2Crypt { +045 +046 /** Default number of rounds if not explicitly specified. */ +047 private static final int ROUNDS_DEFAULT = 5000; +048 +049 /** Maximum number of rounds. */ +050 private static final int ROUNDS_MAX = 999999999; +051 +052 /** Minimum number of rounds. */ +053 private static final int ROUNDS_MIN = 1000; +054 +055 /** Prefix for optional rounds specification. */ +056 private static final String ROUNDS_PREFIX = "rounds="; +057 +058 /** The number of bytes the final hash value will have (SHA-256 variant). */ +059 private static final int SHA256_BLOCKSIZE = 32; +060 +061 /** The prefixes that can be used to identify this crypt() variant (SHA-256). */ +062 static final String SHA256_PREFIX = "$5$"; +063 +064 /** The number of bytes the final hash value will have (SHA-512 variant). */ +065 private static final int SHA512_BLOCKSIZE = 64; +066 +067 /** The prefixes that can be used to identify this crypt() variant (SHA-512). */ +068 static final String SHA512_PREFIX = "$6$"; +069 +070 /** The pattern to match valid salt values. */ +071 private static final Pattern SALT_PATTERN = Pattern +072 .compile("^\\$([56])\\$(rounds=(\\d+)\\$)?([\\.\\/a-zA-Z0-9]{1,16}).*"); +073 +074 /** +075 * Generates a libc crypt() compatible "$5$" hash value with random salt. +076 * <p> +077 * See {@link Crypt#crypt(String, String)} for details. +078 * </p> +079 * <p> +080 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +081 * {@link SecureRandom} to generate your own salts and calling {@link #sha256Crypt(byte[], String)}. +082 * </p> +083 * +084 * @param keyBytes +085 * plaintext to hash +086 * @return complete hash value +087 * @throws IllegalArgumentException +088 * when a {@link java.security.NoSuchAlgorithmException} is caught. +089 */ +090 public static String sha256Crypt(final byte[] keyBytes) { +091 return sha256Crypt(keyBytes, null); +092 } +093 +094 /** +095 * Generates a libc6 crypt() compatible "$5$" hash value. +096 * <p> +097 * See {@link Crypt#crypt(String, String)} for details. +098 * </p> +099 * @param keyBytes +100 * plaintext to hash +101 * @param salt +102 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +103 * you using {@link SecureRandom}. If one does not want to use {@link SecureRandom}, you can pass your +104 * own {@link Random} in {@link #sha256Crypt(byte[], String, Random)}. +105 * @return complete hash value including salt +106 * @throws IllegalArgumentException +107 * if the salt does not match the allowed pattern +108 * @throws IllegalArgumentException +109 * when a {@link java.security.NoSuchAlgorithmException} is caught. +110 */ +111 public static String sha256Crypt(final byte[] keyBytes, String salt) { +112 if (salt == null) { +113 salt = SHA256_PREFIX + B64.getRandomSalt(8); +114 } +115 return sha2Crypt(keyBytes, salt, SHA256_PREFIX, SHA256_BLOCKSIZE, MessageDigestAlgorithms.SHA_256); +116 } +117 +118 /** +119 * Generates a libc6 crypt() compatible "$5$" hash value. +120 * <p> +121 * See {@link Crypt#crypt(String, String)} for details. +122 * </p> +123 * @param keyBytes +124 * plaintext to hash +125 * @param salt +126 * real salt value without prefix or "rounds=". +127 * @param random +128 * the instance of {@link Random} to use for generating the salt. Consider using {@link SecureRandom} +129 * or {@link ThreadLocalRandom}. +130 * @return complete hash value including salt +131 * @throws IllegalArgumentException +132 * if the salt does not match the allowed pattern +133 * @throws IllegalArgumentException +134 * when a {@link java.security.NoSuchAlgorithmException} is caught. +135 */ +136 public static String sha256Crypt(final byte[] keyBytes, String salt, Random random) { +137 if (salt == null) { +138 salt = SHA256_PREFIX + B64.getRandomSalt(8, random); +139 } +140 return sha2Crypt(keyBytes, salt, SHA256_PREFIX, SHA256_BLOCKSIZE, MessageDigestAlgorithms.SHA_256); +141 } +142 +143 /** +144 * Generates a libc6 crypt() compatible "$5$" or "$6$" SHA2 based hash value. +145 * <p> +146 * This is a nearly line by line conversion of the original C function. The numbered comments are from the algorithm +147 * description, the short C-style ones from the original C code and the ones with "Remark" from me. +148 * <p> +149 * See {@link Crypt#crypt(String, String)} for details. +150 * +151 * @param keyBytes +152 * plaintext to hash +153 * @param salt +154 * real salt value without prefix or "rounds="; may not be null +155 * @param saltPrefix +156 * either $5$ or $6$ +157 * @param blocksize +158 * a value that differs between $5$ and $6$ +159 * @param algorithm +160 * {@link MessageDigest} algorithm identifier string +161 * @return complete hash value including prefix and salt +162 * @throws IllegalArgumentException +163 * if the given salt is <code>null</code> or does not match the allowed pattern +164 * @throws IllegalArgumentException +165 * when a {@link NoSuchAlgorithmException} is caught +166 * @see MessageDigestAlgorithms +167 */ +168 private static String sha2Crypt(final byte[] keyBytes, final String salt, final String saltPrefix, +169 final int blocksize, final String algorithm) { +170 +171 final int keyLen = keyBytes.length; +172 +173 // Extracts effective salt and the number of rounds from the given salt. +174 int rounds = ROUNDS_DEFAULT; +175 boolean roundsCustom = false; +176 if (salt == null) { +177 throw new IllegalArgumentException("Salt must not be null"); +178 } +179 +180 final Matcher m = SALT_PATTERN.matcher(salt); +181 if (!m.find()) { +182 throw new IllegalArgumentException("Invalid salt value: " + salt); +183 } +184 if (m.group(3) != null) { +185 rounds = Integer.parseInt(m.group(3)); +186 rounds = Math.max(ROUNDS_MIN, Math.min(ROUNDS_MAX, rounds)); +187 roundsCustom = true; +188 } +189 final String saltString = m.group(4); +190 final byte[] saltBytes = saltString.getBytes(Charsets.UTF_8); +191 final int saltLen = saltBytes.length; +192 +193 // 1. start digest A +194 // Prepare for the real work. +195 MessageDigest ctx = DigestUtils.getDigest(algorithm); +196 +197 // 2. the password string is added to digest A +198 /* +199 * Add the key string. +200 */ +201 ctx.update(keyBytes); +202 +203 // 3. the salt string is added to digest A. This is just the salt string +204 // itself without the enclosing '$', without the magic salt_prefix $5$ and +205 // $6$ respectively and without the rounds=<N> specification. +206 // +207 // NB: the MD5 algorithm did add the $1$ salt_prefix. This is not deemed +208 // necessary since it is a constant string and does not add security +209 // and /possibly/ allows a plain text attack. Since the rounds=<N> +210 // specification should never be added this would also create an +211 // inconsistency. +212 /* +213 * The last part is the salt string. This must be at most 16 characters and it ends at the first `$' character +214 * (for compatibility with existing implementations). +215 */ +216 ctx.update(saltBytes); +217 +218 // 4. start digest B +219 /* +220 * Compute alternate sha512 sum with input KEY, SALT, and KEY. The final result will be added to the first +221 * context. +222 */ +223 MessageDigest altCtx = DigestUtils.getDigest(algorithm); +224 +225 // 5. add the password to digest B +226 /* +227 * Add key. +228 */ +229 altCtx.update(keyBytes); +230 +231 // 6. add the salt string to digest B +232 /* +233 * Add salt. +234 */ +235 altCtx.update(saltBytes); +236 +237 // 7. add the password again to digest B +238 /* +239 * Add key again. +240 */ +241 altCtx.update(keyBytes); +242 +243 // 8. finish digest B +244 /* +245 * Now get result of this (32 bytes) and add it to the other context. +246 */ +247 byte[] altResult = altCtx.digest(); +248 +249 // 9. For each block of 32 or 64 bytes in the password string (excluding +250 // the terminating NUL in the C representation), add digest B to digest A +251 /* +252 * Add for any character in the key one byte of the alternate sum. +253 */ +254 /* +255 * (Remark: the C code comment seems wrong for key length > 32!) +256 */ +257 int cnt = keyBytes.length; +258 while (cnt > blocksize) { +259 ctx.update(altResult, 0, blocksize); +260 cnt -= blocksize; +261 } +262 +263 // 10. For the remaining N bytes of the password string add the first +264 // N bytes of digest B to digest A +265 ctx.update(altResult, 0, cnt); +266 +267 // 11. For each bit of the binary representation of the length of the +268 // password string up to and including the highest 1-digit, starting +269 // from to lowest bit position (numeric value 1): +270 // +271 // a) for a 1-digit add digest B to digest A +272 // +273 // b) for a 0-digit add the password string +274 // +275 // NB: this step differs significantly from the MD5 algorithm. It +276 // adds more randomness. +277 /* +278 * Take the binary representation of the length of the key and for every 1 add the alternate sum, for every 0 +279 * the key. +280 */ +281 cnt = keyBytes.length; +282 while (cnt > 0) { +283 if ((cnt & 1) != 0) { +284 ctx.update(altResult, 0, blocksize); +285 } else { +286 ctx.update(keyBytes); +287 } +288 cnt >>= 1; +289 } +290 +291 // 12. finish digest A +292 /* +293 * Create intermediate result. +294 */ +295 altResult = ctx.digest(); +296 +297 // 13. start digest DP +298 /* +299 * Start computation of P byte sequence. +300 */ +301 altCtx = DigestUtils.getDigest(algorithm); +302 +303 // 14. for every byte in the password (excluding the terminating NUL byte +304 // in the C representation of the string) +305 // +306 // add the password to digest DP +307 /* +308 * For every character in the password add the entire password. +309 */ +310 for (int i = 1; i <= keyLen; i++) { +311 altCtx.update(keyBytes); +312 } +313 +314 // 15. finish digest DP +315 /* +316 * Finish the digest. +317 */ +318 byte[] tempResult = altCtx.digest(); +319 +320 // 16. produce byte sequence P of the same length as the password where +321 // +322 // a) for each block of 32 or 64 bytes of length of the password string +323 // the entire digest DP is used +324 // +325 // b) for the remaining N (up to 31 or 63) bytes use the first N +326 // bytes of digest DP +327 /* +328 * Create byte sequence P. +329 */ +330 final byte[] pBytes = new byte[keyLen]; +331 int cp = 0; +332 while (cp < keyLen - blocksize) { +333 System.arraycopy(tempResult, 0, pBytes, cp, blocksize); +334 cp += blocksize; +335 } +336 System.arraycopy(tempResult, 0, pBytes, cp, keyLen - cp); +337 +338 // 17. start digest DS +339 /* +340 * Start computation of S byte sequence. +341 */ +342 altCtx = DigestUtils.getDigest(algorithm); +343 +344 // 18. repeast the following 16+A[0] times, where A[0] represents the first +345 // byte in digest A interpreted as an 8-bit unsigned value +346 // +347 // add the salt to digest DS +348 /* +349 * For every character in the password add the entire password. +350 */ +351 for (int i = 1; i <= 16 + (altResult[0] & 0xff); i++) { +352 altCtx.update(saltBytes); +353 } +354 +355 // 19. finish digest DS +356 /* +357 * Finish the digest. +358 */ +359 tempResult = altCtx.digest(); +360 +361 // 20. produce byte sequence S of the same length as the salt string where +362 // +363 // a) for each block of 32 or 64 bytes of length of the salt string +364 // the entire digest DS is used +365 // +366 // b) for the remaining N (up to 31 or 63) bytes use the first N +367 // bytes of digest DS +368 /* +369 * Create byte sequence S. +370 */ +371 // Remark: The salt is limited to 16 chars, how does this make sense? +372 final byte[] sBytes = new byte[saltLen]; +373 cp = 0; +374 while (cp < saltLen - blocksize) { +375 System.arraycopy(tempResult, 0, sBytes, cp, blocksize); +376 cp += blocksize; +377 } +378 System.arraycopy(tempResult, 0, sBytes, cp, saltLen - cp); +379 +380 // 21. repeat a loop according to the number specified in the rounds=<N> +381 // specification in the salt (or the default value if none is +382 // present). Each round is numbered, starting with 0 and up to N-1. +383 // +384 // The loop uses a digest as input. In the first round it is the +385 // digest produced in step 12. In the latter steps it is the digest +386 // produced in step 21.h. The following text uses the notation +387 // "digest A/C" to describe this behavior. +388 /* +389 * Repeatedly run the collected hash value through sha512 to burn CPU cycles. +390 */ +391 for (int i = 0; i <= rounds - 1; i++) { +392 // a) start digest C +393 /* +394 * New context. +395 */ +396 ctx = DigestUtils.getDigest(algorithm); +397 +398 // b) for odd round numbers add the byte sequense P to digest C +399 // c) for even round numbers add digest A/C +400 /* +401 * Add key or last result. +402 */ +403 if ((i & 1) != 0) { +404 ctx.update(pBytes, 0, keyLen); +405 } else { +406 ctx.update(altResult, 0, blocksize); +407 } +408 +409 // d) for all round numbers not divisible by 3 add the byte sequence S +410 /* +411 * Add salt for numbers not divisible by 3. +412 */ +413 if (i % 3 != 0) { +414 ctx.update(sBytes, 0, saltLen); +415 } +416 +417 // e) for all round numbers not divisible by 7 add the byte sequence P +418 /* +419 * Add key for numbers not divisible by 7. +420 */ +421 if (i % 7 != 0) { +422 ctx.update(pBytes, 0, keyLen); +423 } +424 +425 // f) for odd round numbers add digest A/C +426 // g) for even round numbers add the byte sequence P +427 /* +428 * Add key or last result. +429 */ +430 if ((i & 1) != 0) { +431 ctx.update(altResult, 0, blocksize); +432 } else { +433 ctx.update(pBytes, 0, keyLen); +434 } +435 +436 // h) finish digest C. +437 /* +438 * Create intermediate result. +439 */ +440 altResult = ctx.digest(); +441 } +442 +443 // 22. Produce the output string. This is an ASCII string of the maximum +444 // size specified above, consisting of multiple pieces: +445 // +446 // a) the salt salt_prefix, $5$ or $6$ respectively +447 // +448 // b) the rounds=<N> specification, if one was present in the input +449 // salt string. A trailing '$' is added in this case to separate +450 // the rounds specification from the following text. +451 // +452 // c) the salt string truncated to 16 characters +453 // +454 // d) a '$' character +455 /* +456 * Now we can construct the result string. It consists of three parts. +457 */ +458 final StringBuilder buffer = new StringBuilder(saltPrefix); +459 if (roundsCustom) { +460 buffer.append(ROUNDS_PREFIX); +461 buffer.append(rounds); +462 buffer.append("$"); +463 } +464 buffer.append(saltString); +465 buffer.append("$"); +466 +467 // e) the base-64 encoded final C digest. The encoding used is as +468 // follows: +469 // [...] +470 // +471 // Each group of three bytes from the digest produces four +472 // characters as output: +473 // +474 // 1. character: the six low bits of the first byte +475 // 2. character: the two high bits of the first byte and the +476 // four low bytes from the second byte +477 // 3. character: the four high bytes from the second byte and +478 // the two low bits from the third byte +479 // 4. character: the six high bits from the third byte +480 // +481 // The groups of three bytes are as follows (in this sequence). +482 // These are the indices into the byte array containing the +483 // digest, starting with index 0. For the last group there are +484 // not enough bytes left in the digest and the value zero is used +485 // in its place. This group also produces only three or two +486 // characters as output for SHA-512 and SHA-512 respectively. +487 +488 // This was just a safeguard in the C implementation: +489 // int buflen = salt_prefix.length() - 1 + ROUNDS_PREFIX.length() + 9 + 1 + salt_string.length() + 1 + 86 + 1; +490 +491 if (blocksize == 32) { +492 B64.b64from24bit(altResult[0], altResult[10], altResult[20], 4, buffer); +493 B64.b64from24bit(altResult[21], altResult[1], altResult[11], 4, buffer); +494 B64.b64from24bit(altResult[12], altResult[22], altResult[2], 4, buffer); +495 B64.b64from24bit(altResult[3], altResult[13], altResult[23], 4, buffer); +496 B64.b64from24bit(altResult[24], altResult[4], altResult[14], 4, buffer); +497 B64.b64from24bit(altResult[15], altResult[25], altResult[5], 4, buffer); +498 B64.b64from24bit(altResult[6], altResult[16], altResult[26], 4, buffer); +499 B64.b64from24bit(altResult[27], altResult[7], altResult[17], 4, buffer); +500 B64.b64from24bit(altResult[18], altResult[28], altResult[8], 4, buffer); +501 B64.b64from24bit(altResult[9], altResult[19], altResult[29], 4, buffer); +502 B64.b64from24bit((byte) 0, altResult[31], altResult[30], 3, buffer); +503 } else { +504 B64.b64from24bit(altResult[0], altResult[21], altResult[42], 4, buffer); +505 B64.b64from24bit(altResult[22], altResult[43], altResult[1], 4, buffer); +506 B64.b64from24bit(altResult[44], altResult[2], altResult[23], 4, buffer); +507 B64.b64from24bit(altResult[3], altResult[24], altResult[45], 4, buffer); +508 B64.b64from24bit(altResult[25], altResult[46], altResult[4], 4, buffer); +509 B64.b64from24bit(altResult[47], altResult[5], altResult[26], 4, buffer); +510 B64.b64from24bit(altResult[6], altResult[27], altResult[48], 4, buffer); +511 B64.b64from24bit(altResult[28], altResult[49], altResult[7], 4, buffer); +512 B64.b64from24bit(altResult[50], altResult[8], altResult[29], 4, buffer); +513 B64.b64from24bit(altResult[9], altResult[30], altResult[51], 4, buffer); +514 B64.b64from24bit(altResult[31], altResult[52], altResult[10], 4, buffer); +515 B64.b64from24bit(altResult[53], altResult[11], altResult[32], 4, buffer); +516 B64.b64from24bit(altResult[12], altResult[33], altResult[54], 4, buffer); +517 B64.b64from24bit(altResult[34], altResult[55], altResult[13], 4, buffer); +518 B64.b64from24bit(altResult[56], altResult[14], altResult[35], 4, buffer); +519 B64.b64from24bit(altResult[15], altResult[36], altResult[57], 4, buffer); +520 B64.b64from24bit(altResult[37], altResult[58], altResult[16], 4, buffer); +521 B64.b64from24bit(altResult[59], altResult[17], altResult[38], 4, buffer); +522 B64.b64from24bit(altResult[18], altResult[39], altResult[60], 4, buffer); +523 B64.b64from24bit(altResult[40], altResult[61], altResult[19], 4, buffer); +524 B64.b64from24bit(altResult[62], altResult[20], altResult[41], 4, buffer); +525 B64.b64from24bit((byte) 0, (byte) 0, altResult[63], 2, buffer); +526 } +527 +528 /* +529 * Clear the buffer for the intermediate result so that people attaching to processes or reading core dumps +530 * cannot get any information. +531 */ +532 // Is there a better way to do this with the JVM? +533 Arrays.fill(tempResult, (byte) 0); +534 Arrays.fill(pBytes, (byte) 0); +535 Arrays.fill(sBytes, (byte) 0); +536 ctx.reset(); +537 altCtx.reset(); +538 Arrays.fill(keyBytes, (byte) 0); +539 Arrays.fill(saltBytes, (byte) 0); +540 +541 return buffer.toString(); +542 } +543 +544 /** +545 * Generates a libc crypt() compatible "$6$" hash value with random salt. +546 * <p> +547 * See {@link Crypt#crypt(String, String)} for details. +548 * </p> +549 * <p> +550 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +551 * {@link SecureRandom} to generate your own salts and calling {@link #sha512Crypt(byte[], String)}. +552 * </p> +553 * +554 * @param keyBytes +555 * plaintext to hash +556 * @return complete hash value +557 * @throws IllegalArgumentException +558 * when a {@link java.security.NoSuchAlgorithmException} is caught. +559 */ +560 public static String sha512Crypt(final byte[] keyBytes) { +561 return sha512Crypt(keyBytes, null); +562 } +563 +564 /** +565 * Generates a libc6 crypt() compatible "$6$" hash value. +566 * <p> +567 * See {@link Crypt#crypt(String, String)} for details. +568 * </p> +569 * @param keyBytes +570 * plaintext to hash +571 * @param salt +572 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated +573 * for you using {@link SecureRandom}; if you want to use a {@link Random} object other than +574 * {@link SecureRandom} then we suggest you provide it using +575 * {@link #sha512Crypt(byte[], String, Random)}. +576 * @return complete hash value including salt +577 * @throws IllegalArgumentException +578 * if the salt does not match the allowed pattern +579 * @throws IllegalArgumentException +580 * when a {@link java.security.NoSuchAlgorithmException} is caught. +581 */ +582 public static String sha512Crypt(final byte[] keyBytes, String salt) { +583 if (salt == null) { +584 salt = SHA512_PREFIX + B64.getRandomSalt(8); +585 } +586 return sha2Crypt(keyBytes, salt, SHA512_PREFIX, SHA512_BLOCKSIZE, MessageDigestAlgorithms.SHA_512); +587 } +588 +589 +590 +591 /** +592 * Generates a libc6 crypt() compatible "$6$" hash value. +593 * <p> +594 * See {@link Crypt#crypt(String, String)} for details. +595 * </p> +596 * @param keyBytes +597 * plaintext to hash +598 * @param salt +599 * real salt value without prefix or "rounds=". The salt may be null, in which case a salt is generated for +600 * you using {@link ThreadLocalRandom}; for more secure salts consider using {@link SecureRandom} to +601 * generate your own salts. +602 * @param random +603 * the instance of {@link Random} to use for generating the salt. Consider using {@link SecureRandom} +604 * or {@link ThreadLocalRandom}. +605 * @return complete hash value including salt +606 * @throws IllegalArgumentException +607 * if the salt does not match the allowed pattern +608 * @throws IllegalArgumentException +609 * when a {@link java.security.NoSuchAlgorithmException} is caught. +610 */ +611 public static String sha512Crypt(final byte[] keyBytes, String salt, final Random random) { +612 if (salt == null) { +613 salt = SHA512_PREFIX + B64.getRandomSalt(8, random); +614 } +615 return sha2Crypt(keyBytes, salt, SHA512_PREFIX, SHA512_BLOCKSIZE, MessageDigestAlgorithms.SHA_512); +616 } +617} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.digest; +018 +019import java.security.SecureRandom; +020import java.util.concurrent.ThreadLocalRandom; +021 +022import org.apache.commons.codec.Charsets; +023 +024/** +025 * Unix crypt(3) algorithm implementation. +026 * <p> +027 * This class only implements the traditional 56 bit DES based algorithm. Please use DigestUtils.crypt() for a method +028 * that distinguishes between all the algorithms supported in the current glibc's crypt(). +029 * <p> +030 * The Java implementation was taken from the JetSpeed Portal project (see +031 * org.apache.jetspeed.services.security.ldap.UnixCrypt). +032 * <p> +033 * This class is slightly incompatible if the given salt contains characters that are not part of the allowed range +034 * [a-zA-Z0-9./]. +035 * <p> +036 * This class is immutable and thread-safe. +037 * +038 * @version $Id$ +039 * @since 1.7 +040 */ +041public class UnixCrypt { +042 +043 private static final int CON_SALT[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +044 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 5, 6, +045 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, +046 34, 35, 36, 37, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, +047 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 0, 0, 0, 0, 0 }; +048 +049 private static final int COV2CHAR[] = { 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 65, 66, 67, 68, 69, 70, +050 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 97, 98, 99, 100, 101, 102, +051 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122 }; +052 +053 private static final char SALT_CHARS[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./" +054 .toCharArray(); +055 +056 private static final boolean SHIFT2[] = { false, false, true, true, true, true, true, true, false, true, true, +057 true, true, true, true, false }; +058 +059 private static final int SKB[][] = { +060 { 0, 16, 0x20000000, 0x20000010, 0x10000, 0x10010, 0x20010000, 0x20010010, 2048, 2064, 0x20000800, +061 0x20000810, 0x10800, 0x10810, 0x20010800, 0x20010810, 32, 48, 0x20000020, 0x20000030, 0x10020, +062 0x10030, 0x20010020, 0x20010030, 2080, 2096, 0x20000820, 0x20000830, 0x10820, 0x10830, 0x20010820, +063 0x20010830, 0x80000, 0x80010, 0x20080000, 0x20080010, 0x90000, 0x90010, 0x20090000, 0x20090010, +064 0x80800, 0x80810, 0x20080800, 0x20080810, 0x90800, 0x90810, 0x20090800, 0x20090810, 0x80020, +065 0x80030, 0x20080020, 0x20080030, 0x90020, 0x90030, 0x20090020, 0x20090030, 0x80820, 0x80830, +066 0x20080820, 0x20080830, 0x90820, 0x90830, 0x20090820, 0x20090830 }, +067 { 0, 0x2000000, 8192, 0x2002000, 0x200000, 0x2200000, 0x202000, 0x2202000, 4, 0x2000004, 8196, 0x2002004, +068 0x200004, 0x2200004, 0x202004, 0x2202004, 1024, 0x2000400, 9216, 0x2002400, 0x200400, 0x2200400, +069 0x202400, 0x2202400, 1028, 0x2000404, 9220, 0x2002404, 0x200404, 0x2200404, 0x202404, 0x2202404, +070 0x10000000, 0x12000000, 0x10002000, 0x12002000, 0x10200000, 0x12200000, 0x10202000, 0x12202000, +071 0x10000004, 0x12000004, 0x10002004, 0x12002004, 0x10200004, 0x12200004, 0x10202004, 0x12202004, +072 0x10000400, 0x12000400, 0x10002400, 0x12002400, 0x10200400, 0x12200400, 0x10202400, 0x12202400, +073 0x10000404, 0x12000404, 0x10002404, 0x12002404, 0x10200404, 0x12200404, 0x10202404, 0x12202404 }, +074 { 0, 1, 0x40000, 0x40001, 0x1000000, 0x1000001, 0x1040000, 0x1040001, 2, 3, 0x40002, 0x40003, 0x1000002, +075 0x1000003, 0x1040002, 0x1040003, 512, 513, 0x40200, 0x40201, 0x1000200, 0x1000201, 0x1040200, +076 0x1040201, 514, 515, 0x40202, 0x40203, 0x1000202, 0x1000203, 0x1040202, 0x1040203, 0x8000000, +077 0x8000001, 0x8040000, 0x8040001, 0x9000000, 0x9000001, 0x9040000, 0x9040001, 0x8000002, 0x8000003, +078 0x8040002, 0x8040003, 0x9000002, 0x9000003, 0x9040002, 0x9040003, 0x8000200, 0x8000201, 0x8040200, +079 0x8040201, 0x9000200, 0x9000201, 0x9040200, 0x9040201, 0x8000202, 0x8000203, 0x8040202, 0x8040203, +080 0x9000202, 0x9000203, 0x9040202, 0x9040203 }, +081 { 0, 0x100000, 256, 0x100100, 8, 0x100008, 264, 0x100108, 4096, 0x101000, 4352, 0x101100, 4104, 0x101008, +082 4360, 0x101108, 0x4000000, 0x4100000, 0x4000100, 0x4100100, 0x4000008, 0x4100008, 0x4000108, +083 0x4100108, 0x4001000, 0x4101000, 0x4001100, 0x4101100, 0x4001008, 0x4101008, 0x4001108, 0x4101108, +084 0x20000, 0x120000, 0x20100, 0x120100, 0x20008, 0x120008, 0x20108, 0x120108, 0x21000, 0x121000, +085 0x21100, 0x121100, 0x21008, 0x121008, 0x21108, 0x121108, 0x4020000, 0x4120000, 0x4020100, +086 0x4120100, 0x4020008, 0x4120008, 0x4020108, 0x4120108, 0x4021000, 0x4121000, 0x4021100, 0x4121100, +087 0x4021008, 0x4121008, 0x4021108, 0x4121108 }, +088 { 0, 0x10000000, 0x10000, 0x10010000, 4, 0x10000004, 0x10004, 0x10010004, 0x20000000, 0x30000000, +089 0x20010000, 0x30010000, 0x20000004, 0x30000004, 0x20010004, 0x30010004, 0x100000, 0x10100000, +090 0x110000, 0x10110000, 0x100004, 0x10100004, 0x110004, 0x10110004, 0x20100000, 0x30100000, +091 0x20110000, 0x30110000, 0x20100004, 0x30100004, 0x20110004, 0x30110004, 4096, 0x10001000, 0x11000, +092 0x10011000, 4100, 0x10001004, 0x11004, 0x10011004, 0x20001000, 0x30001000, 0x20011000, 0x30011000, +093 0x20001004, 0x30001004, 0x20011004, 0x30011004, 0x101000, 0x10101000, 0x111000, 0x10111000, +094 0x101004, 0x10101004, 0x111004, 0x10111004, 0x20101000, 0x30101000, 0x20111000, 0x30111000, +095 0x20101004, 0x30101004, 0x20111004, 0x30111004 }, +096 { 0, 0x8000000, 8, 0x8000008, 1024, 0x8000400, 1032, 0x8000408, 0x20000, 0x8020000, 0x20008, 0x8020008, +097 0x20400, 0x8020400, 0x20408, 0x8020408, 1, 0x8000001, 9, 0x8000009, 1025, 0x8000401, 1033, +098 0x8000409, 0x20001, 0x8020001, 0x20009, 0x8020009, 0x20401, 0x8020401, 0x20409, 0x8020409, +099 0x2000000, 0xa000000, 0x2000008, 0xa000008, 0x2000400, 0xa000400, 0x2000408, 0xa000408, 0x2020000, +100 0xa020000, 0x2020008, 0xa020008, 0x2020400, 0xa020400, 0x2020408, 0xa020408, 0x2000001, 0xa000001, +101 0x2000009, 0xa000009, 0x2000401, 0xa000401, 0x2000409, 0xa000409, 0x2020001, 0xa020001, 0x2020009, +102 0xa020009, 0x2020401, 0xa020401, 0x2020409, 0xa020409 }, +103 { 0, 256, 0x80000, 0x80100, 0x1000000, 0x1000100, 0x1080000, 0x1080100, 16, 272, 0x80010, 0x80110, +104 0x1000010, 0x1000110, 0x1080010, 0x1080110, 0x200000, 0x200100, 0x280000, 0x280100, 0x1200000, +105 0x1200100, 0x1280000, 0x1280100, 0x200010, 0x200110, 0x280010, 0x280110, 0x1200010, 0x1200110, +106 0x1280010, 0x1280110, 512, 768, 0x80200, 0x80300, 0x1000200, 0x1000300, 0x1080200, 0x1080300, 528, +107 784, 0x80210, 0x80310, 0x1000210, 0x1000310, 0x1080210, 0x1080310, 0x200200, 0x200300, 0x280200, +108 0x280300, 0x1200200, 0x1200300, 0x1280200, 0x1280300, 0x200210, 0x200310, 0x280210, 0x280310, +109 0x1200210, 0x1200310, 0x1280210, 0x1280310 }, +110 { 0, 0x4000000, 0x40000, 0x4040000, 2, 0x4000002, 0x40002, 0x4040002, 8192, 0x4002000, 0x42000, 0x4042000, +111 8194, 0x4002002, 0x42002, 0x4042002, 32, 0x4000020, 0x40020, 0x4040020, 34, 0x4000022, 0x40022, +112 0x4040022, 8224, 0x4002020, 0x42020, 0x4042020, 8226, 0x4002022, 0x42022, 0x4042022, 2048, +113 0x4000800, 0x40800, 0x4040800, 2050, 0x4000802, 0x40802, 0x4040802, 10240, 0x4002800, 0x42800, +114 0x4042800, 10242, 0x4002802, 0x42802, 0x4042802, 2080, 0x4000820, 0x40820, 0x4040820, 2082, +115 0x4000822, 0x40822, 0x4040822, 10272, 0x4002820, 0x42820, 0x4042820, 10274, 0x4002822, 0x42822, +116 0x4042822 } }; +117 +118 private static final int SPTRANS[][] = { +119 { 0x820200, 0x20000, 0x80800000, 0x80820200, 0x800000, 0x80020200, 0x80020000, 0x80800000, 0x80020200, +120 0x820200, 0x820000, 0x80000200, 0x80800200, 0x800000, 0, 0x80020000, 0x20000, 0x80000000, +121 0x800200, 0x20200, 0x80820200, 0x820000, 0x80000200, 0x800200, 0x80000000, 512, 0x20200, +122 0x80820000, 512, 0x80800200, 0x80820000, 0, 0, 0x80820200, 0x800200, 0x80020000, 0x820200, +123 0x20000, 0x80000200, 0x800200, 0x80820000, 512, 0x20200, 0x80800000, 0x80020200, 0x80000000, +124 0x80800000, 0x820000, 0x80820200, 0x20200, 0x820000, 0x80800200, 0x800000, 0x80000200, 0x80020000, +125 0, 0x20000, 0x800000, 0x80800200, 0x820200, 0x80000000, 0x80820000, 512, 0x80020200 }, +126 { 0x10042004, 0, 0x42000, 0x10040000, 0x10000004, 8196, 0x10002000, 0x42000, 8192, 0x10040004, 4, +127 0x10002000, 0x40004, 0x10042000, 0x10040000, 4, 0x40000, 0x10002004, 0x10040004, 8192, 0x42004, +128 0x10000000, 0, 0x40004, 0x10002004, 0x42004, 0x10042000, 0x10000004, 0x10000000, 0x40000, 8196, +129 0x10042004, 0x40004, 0x10042000, 0x10002000, 0x42004, 0x10042004, 0x40004, 0x10000004, 0, +130 0x10000000, 8196, 0x40000, 0x10040004, 8192, 0x10000000, 0x42004, 0x10002004, 0x10042000, 8192, 0, +131 0x10000004, 4, 0x10042004, 0x42000, 0x10040000, 0x10040004, 0x40000, 8196, 0x10002000, 0x10002004, +132 4, 0x10040000, 0x42000 }, +133 { 0x41000000, 0x1010040, 64, 0x41000040, 0x40010000, 0x1000000, 0x41000040, 0x10040, 0x1000040, 0x10000, +134 0x1010000, 0x40000000, 0x41010040, 0x40000040, 0x40000000, 0x41010000, 0, 0x40010000, 0x1010040, +135 64, 0x40000040, 0x41010040, 0x10000, 0x41000000, 0x41010000, 0x1000040, 0x40010040, 0x1010000, +136 0x10040, 0, 0x1000000, 0x40010040, 0x1010040, 64, 0x40000000, 0x10000, 0x40000040, 0x40010000, +137 0x1010000, 0x41000040, 0, 0x1010040, 0x10040, 0x41010000, 0x40010000, 0x1000000, 0x41010040, +138 0x40000000, 0x40010040, 0x41000000, 0x1000000, 0x41010040, 0x10000, 0x1000040, 0x41000040, +139 0x10040, 0x1000040, 0, 0x41010000, 0x40000040, 0x41000000, 0x40010040, 64, 0x1010000 }, +140 { 0x100402, 0x4000400, 2, 0x4100402, 0, 0x4100000, 0x4000402, 0x100002, 0x4100400, 0x4000002, 0x4000000, +141 1026, 0x4000002, 0x100402, 0x100000, 0x4000000, 0x4100002, 0x100400, 1024, 2, 0x100400, 0x4000402, +142 0x4100000, 1024, 1026, 0, 0x100002, 0x4100400, 0x4000400, 0x4100002, 0x4100402, 0x100000, +143 0x4100002, 1026, 0x100000, 0x4000002, 0x100400, 0x4000400, 2, 0x4100000, 0x4000402, 0, 1024, +144 0x100002, 0, 0x4100002, 0x4100400, 1024, 0x4000000, 0x4100402, 0x100402, 0x100000, 0x4100402, 2, +145 0x4000400, 0x100402, 0x100002, 0x100400, 0x4100000, 0x4000402, 1026, 0x4000000, 0x4000002, +146 0x4100400 }, +147 { 0x2000000, 16384, 256, 0x2004108, 0x2004008, 0x2000100, 16648, 0x2004000, 16384, 8, 0x2000008, 16640, +148 0x2000108, 0x2004008, 0x2004100, 0, 16640, 0x2000000, 16392, 264, 0x2000100, 16648, 0, 0x2000008, +149 8, 0x2000108, 0x2004108, 16392, 0x2004000, 256, 264, 0x2004100, 0x2004100, 0x2000108, 16392, +150 0x2004000, 16384, 8, 0x2000008, 0x2000100, 0x2000000, 16640, 0x2004108, 0, 16648, 0x2000000, 256, +151 16392, 0x2000108, 256, 0, 0x2004108, 0x2004008, 0x2004100, 264, 16384, 16640, 0x2004008, +152 0x2000100, 264, 8, 16648, 0x2004000, 0x2000008 }, +153 { 0x20000010, 0x80010, 0, 0x20080800, 0x80010, 2048, 0x20000810, 0x80000, 2064, 0x20080810, 0x80800, +154 0x20000000, 0x20000800, 0x20000010, 0x20080000, 0x80810, 0x80000, 0x20000810, 0x20080010, 0, 2048, +155 16, 0x20080800, 0x20080010, 0x20080810, 0x20080000, 0x20000000, 2064, 16, 0x80800, 0x80810, +156 0x20000800, 2064, 0x20000000, 0x20000800, 0x80810, 0x20080800, 0x80010, 0, 0x20000800, 0x20000000, +157 2048, 0x20080010, 0x80000, 0x80010, 0x20080810, 0x80800, 16, 0x20080810, 0x80800, 0x80000, +158 0x20000810, 0x20000010, 0x20080000, 0x80810, 0, 2048, 0x20000010, 0x20000810, 0x20080800, +159 0x20080000, 2064, 16, 0x20080010 }, +160 { 4096, 128, 0x400080, 0x400001, 0x401081, 4097, 4224, 0, 0x400000, 0x400081, 129, 0x401000, 1, 0x401080, +161 0x401000, 129, 0x400081, 4096, 4097, 0x401081, 0, 0x400080, 0x400001, 4224, 0x401001, 4225, +162 0x401080, 1, 4225, 0x401001, 128, 0x400000, 4225, 0x401000, 0x401001, 129, 4096, 128, 0x400000, +163 0x401001, 0x400081, 4225, 4224, 0, 128, 0x400001, 1, 0x400080, 0, 0x400081, 0x400080, 4224, 129, +164 4096, 0x401081, 0x400000, 0x401080, 1, 4097, 0x401081, 0x400001, 0x401080, 0x401000, 4097 }, +165 { 0x8200020, 0x8208000, 32800, 0, 0x8008000, 0x200020, 0x8200000, 0x8208020, 32, 0x8000000, 0x208000, +166 32800, 0x208020, 0x8008020, 0x8000020, 0x8200000, 32768, 0x208020, 0x200020, 0x8008000, 0x8208020, +167 0x8000020, 0, 0x208000, 0x8000000, 0x200000, 0x8008020, 0x8200020, 0x200000, 32768, 0x8208000, 32, +168 0x200000, 32768, 0x8000020, 0x8208020, 32800, 0x8000000, 0, 0x208000, 0x8200020, 0x8008020, +169 0x8008000, 0x200020, 0x8208000, 32, 0x200020, 0x8008000, 0x8208020, 0x200000, 0x8200000, +170 0x8000020, 0x208000, 32800, 0x8008020, 0x8200000, 32, 0x8208000, 0x208020, 0, 0x8000000, +171 0x8200020, 32768, 0x208020 } }; +172 +173 /** +174 * Generates a crypt(3) compatible hash using the DES algorithm. +175 * <p> +176 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +177 * {@link SecureRandom} to generate your own salts and calling {@link #crypt(byte[], String)}. +178 * </p> +179 * +180 * @param original +181 * plaintext password +182 * @return a 13 character string starting with the salt string +183 */ +184 public static String crypt(final byte[] original) { +185 return crypt(original, null); +186 } +187 +188 /** +189 * Generates a crypt(3) compatible hash using the DES algorithm. +190 * <p> +191 * Using unspecified characters as salt results incompatible hash values. +192 * </p> +193 * +194 * @param original +195 * plaintext password +196 * @param salt +197 * a two character string drawn from [a-zA-Z0-9./]. The salt may be null, in which case a salt is +198 * generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +199 * {@link SecureRandom} to generate your own salts. +200 * @return a 13 character string starting with the salt string +201 * @throws IllegalArgumentException +202 * if the salt does not match the allowed pattern +203 */ +204 public static String crypt(final byte[] original, String salt) { +205 if (salt == null) { +206 final ThreadLocalRandom randomGenerator = ThreadLocalRandom.current(); +207 final int numSaltChars = SALT_CHARS.length; +208 salt = "" + SALT_CHARS[randomGenerator.nextInt(numSaltChars)] + +209 SALT_CHARS[randomGenerator.nextInt(numSaltChars)]; +210 } else if (!salt.matches("^[" + B64.B64T + "]{2,}$")) { +211 throw new IllegalArgumentException("Invalid salt value: " + salt); +212 } +213 +214 final StringBuilder buffer = new StringBuilder(" "); +215 final char charZero = salt.charAt(0); +216 final char charOne = salt.charAt(1); +217 buffer.setCharAt(0, charZero); +218 buffer.setCharAt(1, charOne); +219 final int eSwap0 = CON_SALT[charZero]; +220 final int eSwap1 = CON_SALT[charOne] << 4; +221 final byte key[] = new byte[8]; +222 for (int i = 0; i < key.length; i++) { +223 key[i] = 0; +224 } +225 +226 for (int i = 0; i < key.length && i < original.length; i++) { +227 final int iChar = original[i]; +228 key[i] = (byte) (iChar << 1); +229 } +230 +231 final int schedule[] = desSetKey(key); +232 final int out[] = body(schedule, eSwap0, eSwap1); +233 final byte b[] = new byte[9]; +234 intToFourBytes(out[0], b, 0); +235 intToFourBytes(out[1], b, 4); +236 b[8] = 0; +237 int i = 2; +238 int y = 0; +239 int u = 128; +240 for (; i < 13; i++) { +241 int j = 0; +242 int c = 0; +243 for (; j < 6; j++) { +244 c <<= 1; +245 if ((b[y] & u) != 0) { +246 c |= 0x1; +247 } +248 u >>>= 1; +249 if (u == 0) { +250 y++; +251 u = 128; +252 } +253 buffer.setCharAt(i, (char) COV2CHAR[c]); +254 } +255 } +256 return buffer.toString(); +257 } +258 +259 /** +260 * Generates a crypt(3) compatible hash using the DES algorithm. +261 * <p> +262 * A salt is generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +263 * {@link SecureRandom} to generate your own salts and calling {@link #crypt(String, String)}. +264 * </p> +265 * +266 * @param original +267 * plaintext password +268 * @return a 13 character string starting with the salt string +269 */ +270 public static String crypt(final String original) { +271 return crypt(original.getBytes(Charsets.UTF_8)); +272 } +273 +274 /** +275 * Generates a crypt(3) compatible hash using the DES algorithm. +276 * +277 * @param original +278 * plaintext password +279 * @param salt +280 * a two character string drawn from [a-zA-Z0-9./]. The salt may be null, in which case a salt is +281 * generated for you using {@link ThreadLocalRandom}; for more secure salts consider using +282 * {@link SecureRandom} to generate your own salts. +283 * @return a 13 character string starting with the salt string +284 * @throws IllegalArgumentException +285 * if the salt does not match the allowed pattern +286 */ +287 public static String crypt(final String original, final String salt) { +288 return crypt(original.getBytes(Charsets.UTF_8), salt); +289 } +290 +291 private static int[] body(final int schedule[], final int eSwap0, final int eSwap1) { +292 int left = 0; +293 int right = 0; +294 int t = 0; +295 for (int j = 0; j < 25; j++) { +296 for (int i = 0; i < 32; i += 4) { +297 left = dEncrypt(left, right, i, eSwap0, eSwap1, schedule); +298 right = dEncrypt(right, left, i + 2, eSwap0, eSwap1, schedule); +299 } +300 t = left; +301 left = right; +302 right = t; +303 } +304 +305 t = right; +306 right = left >>> 1 | left << 31; +307 left = t >>> 1 | t << 31; +308 final int results[] = new int[2]; +309 permOp(right, left, 1, 0x55555555, results); +310 right = results[0]; +311 left = results[1]; +312 permOp(left, right, 8, 0xff00ff, results); +313 left = results[0]; +314 right = results[1]; +315 permOp(right, left, 2, 0x33333333, results); +316 right = results[0]; +317 left = results[1]; +318 permOp(left, right, 16, 65535, results); +319 left = results[0]; +320 right = results[1]; +321 permOp(right, left, 4, 0xf0f0f0f, results); +322 right = results[0]; +323 left = results[1]; +324 final int out[] = new int[2]; +325 out[0] = left; +326 out[1] = right; +327 return out; +328 } +329 +330 private static int byteToUnsigned(final byte b) { +331 final int value = b; +332 return value < 0 ? value + 256 : value; +333 } +334 +335 private static int dEncrypt(int el, final int r, final int s, final int e0, final int e1, final int sArr[]) { +336 int v = r ^ r >>> 16; +337 int u = v & e0; +338 v &= e1; +339 u = u ^ u << 16 ^ r ^ sArr[s]; +340 int t = v ^ v << 16 ^ r ^ sArr[s + 1]; +341 t = t >>> 4 | t << 28; +342 el ^= SPTRANS[1][t & 0x3f] | SPTRANS[3][t >>> 8 & 0x3f] | SPTRANS[5][t >>> 16 & 0x3f] | +343 SPTRANS[7][t >>> 24 & 0x3f] | SPTRANS[0][u & 0x3f] | SPTRANS[2][u >>> 8 & 0x3f] | +344 SPTRANS[4][u >>> 16 & 0x3f] | SPTRANS[6][u >>> 24 & 0x3f]; +345 return el; +346 } +347 +348 private static int[] desSetKey(final byte key[]) { +349 final int schedule[] = new int[32]; +350 int c = fourBytesToInt(key, 0); +351 int d = fourBytesToInt(key, 4); +352 final int results[] = new int[2]; +353 permOp(d, c, 4, 0xf0f0f0f, results); +354 d = results[0]; +355 c = results[1]; +356 c = hPermOp(c, -2, 0xcccc0000); +357 d = hPermOp(d, -2, 0xcccc0000); +358 permOp(d, c, 1, 0x55555555, results); +359 d = results[0]; +360 c = results[1]; +361 permOp(c, d, 8, 0xff00ff, results); +362 c = results[0]; +363 d = results[1]; +364 permOp(d, c, 1, 0x55555555, results); +365 d = results[0]; +366 c = results[1]; +367 d = (d & 0xff) << 16 | d & 0xff00 | (d & 0xff0000) >>> 16 | (c & 0xf0000000) >>> 4; +368 c &= 0xfffffff; +369 int j = 0; +370 for (int i = 0; i < 16; i++) { +371 if (SHIFT2[i]) { +372 c = c >>> 2 | c << 26; +373 d = d >>> 2 | d << 26; +374 } else { +375 c = c >>> 1 | c << 27; +376 d = d >>> 1 | d << 27; +377 } +378 c &= 0xfffffff; +379 d &= 0xfffffff; +380 int s = SKB[0][c & 0x3f] | SKB[1][c >>> 6 & 0x3 | c >>> 7 & 0x3c] | +381 SKB[2][c >>> 13 & 0xf | c >>> 14 & 0x30] | +382 SKB[3][c >>> 20 & 0x1 | c >>> 21 & 0x6 | c >>> 22 & 0x38]; +383 final int t = SKB[4][d & 0x3f] | SKB[5][d >>> 7 & 0x3 | d >>> 8 & 0x3c] | SKB[6][d >>> 15 & 0x3f] | +384 SKB[7][d >>> 21 & 0xf | d >>> 22 & 0x30]; +385 schedule[j++] = (t << 16 | s & 0xffff); +386 s = s >>> 16 | t & 0xffff0000; +387 s = s << 4 | s >>> 28; +388 schedule[j++] = s; +389 } +390 +391 return schedule; +392 } +393 +394 private static int fourBytesToInt(final byte b[], int offset) { +395 int value = byteToUnsigned(b[offset++]); +396 value |= byteToUnsigned(b[offset++]) << 8; +397 value |= byteToUnsigned(b[offset++]) << 16; +398 value |= byteToUnsigned(b[offset++]) << 24; +399 return value; +400 } +401 +402 private static int hPermOp(int a, final int n, final int m) { +403 final int t = (a << 16 - n ^ a) & m; +404 a = a ^ t ^ t >>> 16 - n; +405 return a; +406 } +407 +408 private static void intToFourBytes(final int iValue, final byte b[], int offset) { +409 b[offset++] = (byte) (iValue & 0xff); +410 b[offset++] = (byte) (iValue >>> 8 & 0xff); +411 b[offset++] = (byte) (iValue >>> 16 & 0xff); +412 b[offset++] = (byte) (iValue >>> 24 & 0xff); +413 } +414 +415 private static void permOp(int a, int b, final int n, final int m, final int results[]) { +416 final int t = (a >>> n ^ b) & m; +417 a ^= t << n; +418 b ^= t; +419 results[0] = a; +420 results[1] = b; +421 } +422 +423} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.digest; +019 +020import static java.lang.Integer.rotateLeft; +021 +022import java.util.zip.Checksum; +023 +024/** +025 * Implementation of the xxhash32 hash algorithm. +026 * +027 * <p>Copied from Commons Compress 1.14 +028 * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p> +029 * <p>NotThreadSafe</p> +030 * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a> +031 * @since 1.11 +032 */ +033public class XXHash32 implements Checksum { +034 +035 private static final int BUF_SIZE = 16; +036 private static final int ROTATE_BITS = 13; +037 +038 private static final int PRIME1 = (int) 2654435761l; +039 private static final int PRIME2 = (int) 2246822519l; +040 private static final int PRIME3 = (int) 3266489917l; +041 private static final int PRIME4 = 668265263; +042 private static final int PRIME5 = 374761393; +043 +044 private final byte[] oneByte = new byte[1]; +045 private final int[] state = new int[4]; +046 // Note: the code used to use ByteBuffer but the manual method is 50% faster +047 // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c +048 private final byte[] buffer = new byte[BUF_SIZE]; +049 private final int seed; +050 +051 private int totalLen; +052 private int pos; +053 +054 /** +055 * Creates an XXHash32 instance with a seed of 0. +056 */ +057 public XXHash32() { +058 this(0); +059 } +060 +061 /** +062 * Creates an XXHash32 instance. +063 * @param seed the seed to use +064 */ +065 public XXHash32(final int seed) { +066 this.seed = seed; +067 initializeState(); +068 } +069 +070 @Override +071 public void reset() { +072 initializeState(); +073 totalLen = 0; +074 pos = 0; +075 } +076 +077 @Override +078 public void update(final int b) { +079 oneByte[0] = (byte) (b & 0xff); +080 update(oneByte, 0, 1); +081 } +082 +083 @Override +084 public void update(final byte[] b, int off, final int len) { +085 if (len <= 0) { +086 return; +087 } +088 totalLen += len; +089 +090 final int end = off + len; +091 +092 if (pos + len < BUF_SIZE) { +093 System.arraycopy(b, off, buffer, pos, len); +094 pos += len; +095 return; +096 } +097 +098 if (pos > 0) { +099 final int size = BUF_SIZE - pos; +100 System.arraycopy(b, off, buffer, pos, size); +101 process(buffer, 0); +102 off += size; +103 } +104 +105 final int limit = end - BUF_SIZE; +106 while (off <= limit) { +107 process(b, off); +108 off += BUF_SIZE; +109 } +110 +111 if (off < end) { +112 pos = end - off; +113 System.arraycopy(b, off, buffer, 0, pos); +114 } +115 } +116 +117 @Override +118 public long getValue() { +119 int hash; +120 if (totalLen > BUF_SIZE) { +121 hash = +122 rotateLeft(state[0], 1) + +123 rotateLeft(state[1], 7) + +124 rotateLeft(state[2], 12) + +125 rotateLeft(state[3], 18); +126 } else { +127 hash = state[2] + PRIME5; +128 } +129 hash += totalLen; +130 +131 int idx = 0; +132 final int limit = pos - 4; +133 for (; idx <= limit; idx += 4) { +134 hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4; +135 } +136 while (idx < pos) { +137 hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1; +138 } +139 +140 hash ^= hash >>> 15; +141 hash *= PRIME2; +142 hash ^= hash >>> 13; +143 hash *= PRIME3; +144 hash ^= hash >>> 16; +145 return hash & 0xffffffffl; +146 } +147 +148 private static int getInt(final byte[] buffer, final int idx) { +149 return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl); +150 } +151 +152 private void initializeState() { +153 state[0] = seed + PRIME1 + PRIME2; +154 state[1] = seed + PRIME2; +155 state[2] = seed; +156 state[3] = seed - PRIME1; +157 } +158 +159 private void process(final byte[] b, final int offset) { +160 // local shadows for performance +161 int s0 = state[0]; +162 int s1 = state[1]; +163 int s2 = state[2]; +164 int s3 = state[3]; +165 +166 s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1; +167 s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1; +168 s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1; +169 s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1; +170 +171 state[0] = s0; +172 state[1] = s1; +173 state[2] = s2; +174 state[3] = s3; +175 +176 pos = 0; +177 } +178 +179 /** +180 * Reads the given byte array as a little endian long. +181 * @param bytes the byte array to convert +182 * @param off the offset into the array that starts the value +183 * @param length the number of bytes representing the value +184 * @return the number read +185 * @throws IllegalArgumentException if len is bigger than eight +186 */ +187 private static long fromLittleEndian(final byte[] bytes, final int off, final int length) { +188 if (length > 8) { +189 throw new IllegalArgumentException("can't read more than eight bytes into a long value"); +190 } +191 long l = 0; +192 for (int i = 0; i < length; i++) { +193 l |= (bytes[off + i] & 0xffl) << (8 * i); +194 } +195 return l; +196 } +197} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes a string into a Caverphone value. +025 * +026 * This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 +027 * algorithm: +028 * +029 * <p>This class is immutable and thread-safe.</p> +030 * +031 * @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ +032 * @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> +033 * @since 1.5 +034 */ +035public abstract class AbstractCaverphone implements StringEncoder { +036 +037 /** +038 * Creates an instance of the Caverphone encoder +039 */ +040 public AbstractCaverphone() { +041 super(); +042 } +043 +044 /** +045 * Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of +046 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. +047 * +048 * @param source +049 * Object to encode +050 * @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String +051 * supplied. +052 * @throws EncoderException +053 * if the parameter supplied is not of type java.lang.String +054 */ +055 @Override +056 public Object encode(final Object source) throws EncoderException { +057 if (!(source instanceof String)) { +058 throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String"); +059 } +060 return this.encode((String) source); +061 } +062 +063 /** +064 * Tests if the encodings of two strings are equal. +065 * +066 * This method might be promoted to a new AbstractStringEncoder superclass. +067 * +068 * @param str1 +069 * First of two strings to compare +070 * @param str2 +071 * Second of two strings to compare +072 * @return <code>true</code> if the encodings of these strings are identical, <code>false</code> otherwise. +073 * @throws EncoderException +074 * thrown if there is an error condition during the encoding process. +075 */ +076 public boolean isEncodeEqual(final String str1, final String str2) throws EncoderException { +077 return this.encode(str1).equals(this.encode(str2)); +078 } +079 +080} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes a string into a Caverphone 2.0 value. Delegate to a {@link Caverphone2} instance. +025 * +026 * This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 +027 * algorithm: +028 * +029 * @version $Id: Caverphone.java 1079535 2011-03-08 20:54:37Z ggregory $ +030 * @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> +031 * @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a> +032 * @since 1.4 +033 * @deprecated 1.5 Replaced by {@link Caverphone2}, will be removed in 2.0. +034 */ +035@Deprecated +036public class Caverphone implements StringEncoder { +037 +038 /** +039 * Delegate to a {@link Caverphone2} instance to avoid code duplication. +040 */ +041 final private Caverphone2 encoder = new Caverphone2(); +042 +043 /** +044 * Creates an instance of the Caverphone encoder +045 */ +046 public Caverphone() { +047 super(); +048 } +049 +050 /** +051 * Encodes the given String into a Caverphone value. +052 * +053 * @param source +054 * String the source string +055 * @return A caverphone code for the given String +056 */ +057 public String caverphone(final String source) { +058 return this.encoder.encode(source); +059 } +060 +061 /** +062 * Encodes an Object using the caverphone algorithm. This method is provided in order to satisfy the requirements of +063 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. +064 * +065 * @param obj +066 * Object to encode +067 * @return An object (or type java.lang.String) containing the caverphone code which corresponds to the String +068 * supplied. +069 * @throws EncoderException +070 * if the parameter supplied is not of type java.lang.String +071 */ +072 @Override +073 public Object encode(final Object obj) throws EncoderException { +074 if (!(obj instanceof String)) { +075 throw new EncoderException("Parameter supplied to Caverphone encode is not of type java.lang.String"); +076 } +077 return this.caverphone((String) obj); +078 } +079 +080 /** +081 * Encodes a String using the Caverphone algorithm. +082 * +083 * @param str +084 * String object to encode +085 * @return The caverphone code corresponding to the String supplied +086 */ +087 @Override +088 public String encode(final String str) { +089 return this.caverphone(str); +090 } +091 +092 /** +093 * Tests if the caverphones of two strings are identical. +094 * +095 * @param str1 +096 * First of two strings to compare +097 * @param str2 +098 * Second of two strings to compare +099 * @return <code>true</code> if the caverphones of these strings are identical, <code>false</code> otherwise. +100 */ +101 public boolean isCaverphoneEqual(final String str1, final String str2) { +102 return this.caverphone(str1).equals(this.caverphone(str2)); +103 } +104 +105} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020/** +021 * Encodes a string into a Caverphone 1.0 value. +022 * +023 * This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 1.0 +024 * algorithm: +025 * +026 * @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ +027 * @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> +028 * @see <a href="http://caversham.otago.ac.nz/files/working/ctp060902.pdf">Caverphone 1.0 specification</a> +029 * @since 1.5 +030 * +031 * <p>This class is immutable and thread-safe.</p> +032 */ +033public class Caverphone1 extends AbstractCaverphone { +034 +035 private static final String SIX_1 = "111111"; +036 +037 /** +038 * Encodes the given String into a Caverphone value. +039 * +040 * @param source +041 * String the source string +042 * @return A caverphone code for the given String +043 */ +044 @Override +045 public String encode(final String source) { +046 String txt = source; +047 if (txt == null || txt.length() == 0) { +048 return SIX_1; +049 } +050 +051 // 1. Convert to lowercase +052 txt = txt.toLowerCase(java.util.Locale.ENGLISH); +053 +054 // 2. Remove anything not A-Z +055 txt = txt.replaceAll("[^a-z]", ""); +056 +057 // 3. Handle various start options +058 // 2 is a temporary placeholder to indicate a consonant which we are no longer interested in. +059 txt = txt.replaceAll("^cough", "cou2f"); +060 txt = txt.replaceAll("^rough", "rou2f"); +061 txt = txt.replaceAll("^tough", "tou2f"); +062 txt = txt.replaceAll("^enough", "enou2f"); +063 txt = txt.replaceAll("^gn", "2n"); +064 +065 // End +066 txt = txt.replaceAll("mb$", "m2"); +067 +068 // 4. Handle replacements +069 txt = txt.replaceAll("cq", "2q"); +070 txt = txt.replaceAll("ci", "si"); +071 txt = txt.replaceAll("ce", "se"); +072 txt = txt.replaceAll("cy", "sy"); +073 txt = txt.replaceAll("tch", "2ch"); +074 txt = txt.replaceAll("c", "k"); +075 txt = txt.replaceAll("q", "k"); +076 txt = txt.replaceAll("x", "k"); +077 txt = txt.replaceAll("v", "f"); +078 txt = txt.replaceAll("dg", "2g"); +079 txt = txt.replaceAll("tio", "sio"); +080 txt = txt.replaceAll("tia", "sia"); +081 txt = txt.replaceAll("d", "t"); +082 txt = txt.replaceAll("ph", "fh"); +083 txt = txt.replaceAll("b", "p"); +084 txt = txt.replaceAll("sh", "s2"); +085 txt = txt.replaceAll("z", "s"); +086 txt = txt.replaceAll("^[aeiou]", "A"); +087 // 3 is a temporary placeholder marking a vowel +088 txt = txt.replaceAll("[aeiou]", "3"); +089 txt = txt.replaceAll("3gh3", "3kh3"); +090 txt = txt.replaceAll("gh", "22"); +091 txt = txt.replaceAll("g", "k"); +092 txt = txt.replaceAll("s+", "S"); +093 txt = txt.replaceAll("t+", "T"); +094 txt = txt.replaceAll("p+", "P"); +095 txt = txt.replaceAll("k+", "K"); +096 txt = txt.replaceAll("f+", "F"); +097 txt = txt.replaceAll("m+", "M"); +098 txt = txt.replaceAll("n+", "N"); +099 txt = txt.replaceAll("w3", "W3"); +100 txt = txt.replaceAll("wy", "Wy"); // 1.0 only +101 txt = txt.replaceAll("wh3", "Wh3"); +102 txt = txt.replaceAll("why", "Why"); // 1.0 only +103 txt = txt.replaceAll("w", "2"); +104 txt = txt.replaceAll("^h", "A"); +105 txt = txt.replaceAll("h", "2"); +106 txt = txt.replaceAll("r3", "R3"); +107 txt = txt.replaceAll("ry", "Ry"); // 1.0 only +108 txt = txt.replaceAll("r", "2"); +109 txt = txt.replaceAll("l3", "L3"); +110 txt = txt.replaceAll("ly", "Ly"); // 1.0 only +111 txt = txt.replaceAll("l", "2"); +112 txt = txt.replaceAll("j", "y"); // 1.0 only +113 txt = txt.replaceAll("y3", "Y3"); // 1.0 only +114 txt = txt.replaceAll("y", "2"); // 1.0 only +115 +116 // 5. Handle removals +117 txt = txt.replaceAll("2", ""); +118 txt = txt.replaceAll("3", ""); +119 +120 // 6. put six 1s on the end +121 txt = txt + SIX_1; +122 +123 // 7. take the first six characters as the code +124 return txt.substring(0, SIX_1.length()); +125 } +126 +127} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020/** +021 * Encodes a string into a Caverphone 2.0 value. +022 * +023 * This is an algorithm created by the Caversham Project at the University of Otago. It implements the Caverphone 2.0 +024 * algorithm: +025 * +026 * @version $Id: Caverphone.java 1075947 2011-03-01 17:56:14Z ggregory $ +027 * @see <a href="http://en.wikipedia.org/wiki/Caverphone">Wikipedia - Caverphone</a> +028 * @see <a href="http://caversham.otago.ac.nz/files/working/ctp150804.pdf">Caverphone 2.0 specification</a> +029 * @since 1.5 +030 * +031 * <p>This class is immutable and thread-safe.</p> +032 */ +033public class Caverphone2 extends AbstractCaverphone { +034 +035 private static final String TEN_1 = "1111111111"; +036 +037 /** +038 * Encodes the given String into a Caverphone 2.0 value. +039 * +040 * @param source +041 * String the source string +042 * @return A caverphone code for the given String +043 */ +044 @Override +045 public String encode(final String source) { +046 String txt = source; +047 if (txt == null || txt.length() == 0) { +048 return TEN_1; +049 } +050 +051 // 1. Convert to lowercase +052 txt = txt.toLowerCase(java.util.Locale.ENGLISH); +053 +054 // 2. Remove anything not A-Z +055 txt = txt.replaceAll("[^a-z]", ""); +056 +057 // 2.5. Remove final e +058 txt = txt.replaceAll("e$", ""); // 2.0 only +059 +060 // 3. Handle various start options +061 txt = txt.replaceAll("^cough", "cou2f"); +062 txt = txt.replaceAll("^rough", "rou2f"); +063 txt = txt.replaceAll("^tough", "tou2f"); +064 txt = txt.replaceAll("^enough", "enou2f"); // 2.0 only +065 txt = txt.replaceAll("^trough", "trou2f"); // 2.0 only +066 // note the spec says ^enough here again, c+p error I assume +067 txt = txt.replaceAll("^gn", "2n"); +068 +069 // End +070 txt = txt.replaceAll("mb$", "m2"); +071 +072 // 4. Handle replacements +073 txt = txt.replaceAll("cq", "2q"); +074 txt = txt.replaceAll("ci", "si"); +075 txt = txt.replaceAll("ce", "se"); +076 txt = txt.replaceAll("cy", "sy"); +077 txt = txt.replaceAll("tch", "2ch"); +078 txt = txt.replaceAll("c", "k"); +079 txt = txt.replaceAll("q", "k"); +080 txt = txt.replaceAll("x", "k"); +081 txt = txt.replaceAll("v", "f"); +082 txt = txt.replaceAll("dg", "2g"); +083 txt = txt.replaceAll("tio", "sio"); +084 txt = txt.replaceAll("tia", "sia"); +085 txt = txt.replaceAll("d", "t"); +086 txt = txt.replaceAll("ph", "fh"); +087 txt = txt.replaceAll("b", "p"); +088 txt = txt.replaceAll("sh", "s2"); +089 txt = txt.replaceAll("z", "s"); +090 txt = txt.replaceAll("^[aeiou]", "A"); +091 txt = txt.replaceAll("[aeiou]", "3"); +092 txt = txt.replaceAll("j", "y"); // 2.0 only +093 txt = txt.replaceAll("^y3", "Y3"); // 2.0 only +094 txt = txt.replaceAll("^y", "A"); // 2.0 only +095 txt = txt.replaceAll("y", "3"); // 2.0 only +096 txt = txt.replaceAll("3gh3", "3kh3"); +097 txt = txt.replaceAll("gh", "22"); +098 txt = txt.replaceAll("g", "k"); +099 txt = txt.replaceAll("s+", "S"); +100 txt = txt.replaceAll("t+", "T"); +101 txt = txt.replaceAll("p+", "P"); +102 txt = txt.replaceAll("k+", "K"); +103 txt = txt.replaceAll("f+", "F"); +104 txt = txt.replaceAll("m+", "M"); +105 txt = txt.replaceAll("n+", "N"); +106 txt = txt.replaceAll("w3", "W3"); +107 txt = txt.replaceAll("wh3", "Wh3"); +108 txt = txt.replaceAll("w$", "3"); // 2.0 only +109 txt = txt.replaceAll("w", "2"); +110 txt = txt.replaceAll("^h", "A"); +111 txt = txt.replaceAll("h", "2"); +112 txt = txt.replaceAll("r3", "R3"); +113 txt = txt.replaceAll("r$", "3"); // 2.0 only +114 txt = txt.replaceAll("r", "2"); +115 txt = txt.replaceAll("l3", "L3"); +116 txt = txt.replaceAll("l$", "3"); // 2.0 only +117 txt = txt.replaceAll("l", "2"); +118 +119 // 5. Handle removals +120 txt = txt.replaceAll("2", ""); +121 txt = txt.replaceAll("3$", "A"); // 2.0 only +122 txt = txt.replaceAll("3", ""); +123 +124 // 6. put ten 1s on the end +125 txt = txt + TEN_1; +126 +127 // 7. take the first ten characters as the code +128 return txt.substring(0, TEN_1.length()); +129 } +130 +131} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import java.util.Locale; +021 +022import org.apache.commons.codec.EncoderException; +023import org.apache.commons.codec.StringEncoder; +024 +025/** +026 * Encodes a string into a Cologne Phonetic value. +027 * <p> +028 * Implements the <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Kölner Phonetik</a> (Cologne +029 * Phonetic) algorithm issued by Hans Joachim Postel in 1969. +030 * </p> +031 * <p> +032 * The <i>Kölner Phonetik</i> is a phonetic algorithm which is optimized for the German language. It is related to +033 * the well-known soundex algorithm. +034 * </p> +035 * +036 * <h2>Algorithm</h2> +037 * +038 * <ul> +039 * +040 * <li> +041 * <h3>Step 1:</h3> +042 * After preprocessing (conversion to upper case, transcription of <a +043 * href="http://en.wikipedia.org/wiki/Germanic_umlaut">germanic umlauts</a>, removal of non alphabetical characters) the +044 * letters of the supplied text are replaced by their phonetic code according to the following table. +045 * <table border="1"> +046 * <caption style="caption-side: bottom"><small><i>(Source: <a +047 * href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik#Buchstabencodes">Wikipedia (de): Kölner Phonetik -- +048 * Buchstabencodes</a>)</i></small></caption> <tbody> +049 * <tr> +050 * <th>Letter</th> +051 * <th>Context</th> +052 * <th>Code</th> +053 * </tr> +054 * <tr> +055 * <td>A, E, I, J, O, U, Y</td> +056 * <td></td> +057 * <td>0</td> +058 * </tr> +059 * <tr> +060 * +061 * <td>H</td> +062 * <td></td> +063 * <td>-</td> +064 * </tr> +065 * <tr> +066 * <td>B</td> +067 * <td></td> +068 * <td rowspan="2">1</td> +069 * </tr> +070 * <tr> +071 * <td>P</td> +072 * <td>not before H</td> +073 * +074 * </tr> +075 * <tr> +076 * <td>D, T</td> +077 * <td>not before C, S, Z</td> +078 * <td>2</td> +079 * </tr> +080 * <tr> +081 * <td>F, V, W</td> +082 * <td></td> +083 * <td rowspan="2">3</td> +084 * </tr> +085 * <tr> +086 * +087 * <td>P</td> +088 * <td>before H</td> +089 * </tr> +090 * <tr> +091 * <td>G, K, Q</td> +092 * <td></td> +093 * <td rowspan="3">4</td> +094 * </tr> +095 * <tr> +096 * <td rowspan="2">C</td> +097 * <td>at onset before A, H, K, L, O, Q, R, U, X</td> +098 * +099 * </tr> +100 * <tr> +101 * <td>before A, H, K, O, Q, U, X except after S, Z</td> +102 * </tr> +103 * <tr> +104 * <td>X</td> +105 * <td>not after C, K, Q</td> +106 * <td>48</td> +107 * </tr> +108 * <tr> +109 * <td>L</td> +110 * <td></td> +111 * +112 * <td>5</td> +113 * </tr> +114 * <tr> +115 * <td>M, N</td> +116 * <td></td> +117 * <td>6</td> +118 * </tr> +119 * <tr> +120 * <td>R</td> +121 * <td></td> +122 * <td>7</td> +123 * </tr> +124 * +125 * <tr> +126 * <td>S, Z</td> +127 * <td></td> +128 * <td rowspan="6">8</td> +129 * </tr> +130 * <tr> +131 * <td rowspan="3">C</td> +132 * <td>after S, Z</td> +133 * </tr> +134 * <tr> +135 * <td>at onset except before A, H, K, L, O, Q, R, U, X</td> +136 * </tr> +137 * +138 * <tr> +139 * <td>not before A, H, K, O, Q, U, X</td> +140 * </tr> +141 * <tr> +142 * <td>D, T</td> +143 * <td>before C, S, Z</td> +144 * </tr> +145 * <tr> +146 * <td>X</td> +147 * <td>after C, K, Q</td> +148 * </tr> +149 * </tbody> +150 * </table> +151 * +152 * <h4>Example:</h4> +153 * +154 * <code>"M</code>ü<code>ller-L</code>ü<code>denscheidt" +155 * => "MULLERLUDENSCHEIDT" => "6005507500206880022"</code> +156 * +157 * </li> +158 * +159 * <li> +160 * <h3>Step 2:</h3> +161 * Collapse of all multiple consecutive code digits. +162 * <h4>Example:</h4> +163 * <code>"6005507500206880022" => "6050750206802"</code></li> +164 * +165 * <li> +166 * <h3>Step 3:</h3> +167 * Removal of all codes "0" except at the beginning. This means that two or more identical consecutive digits can occur +168 * if they occur after removing the "0" digits. +169 * +170 * <h4>Example:</h4> +171 * <code>"6050750206802" => "65752682"</code></li> +172 * +173 * </ul> +174 * +175 * <p> +176 * This class is thread-safe. +177 * </p> +178 * +179 * @see <a href="http://de.wikipedia.org/wiki/K%C3%B6lner_Phonetik">Wikipedia (de): Kölner Phonetik (in German)</a> +180 * @since 1.5 +181 */ +182public class ColognePhonetic implements StringEncoder { +183 +184 // Predefined char arrays for better performance and less GC load +185 private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' }; +186 private static final char[] SCZ = new char[] { 'S', 'C', 'Z' }; +187 private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' }; +188 private static final char[] GKQ = new char[] { 'G', 'K', 'Q' }; +189 private static final char[] CKQ = new char[] { 'C', 'K', 'Q' }; +190 private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' }; +191 private static final char[] SZ = new char[] { 'S', 'Z' }; +192 private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' }; +193 private static final char[] TDX = new char[] { 'T', 'D', 'X' }; +194 +195 /** +196 * This class is not thread-safe; the field {@link #length} is mutable. +197 * However, it is not shared between threads, as it is constructed on demand +198 * by the method {@link ColognePhonetic#colognePhonetic(String)} +199 */ +200 private abstract class CologneBuffer { +201 +202 protected final char[] data; +203 +204 protected int length = 0; +205 +206 public CologneBuffer(final char[] data) { +207 this.data = data; +208 this.length = data.length; +209 } +210 +211 public CologneBuffer(final int buffSize) { +212 this.data = new char[buffSize]; +213 this.length = 0; +214 } +215 +216 protected abstract char[] copyData(int start, final int length); +217 +218 public int length() { +219 return length; +220 } +221 +222 @Override +223 public String toString() { +224 return new String(copyData(0, length)); +225 } +226 } +227 +228 private class CologneOutputBuffer extends CologneBuffer { +229 +230 public CologneOutputBuffer(final int buffSize) { +231 super(buffSize); +232 } +233 +234 public void addRight(final char chr) { +235 data[length] = chr; +236 length++; +237 } +238 +239 @Override +240 protected char[] copyData(final int start, final int length) { +241 final char[] newData = new char[length]; +242 System.arraycopy(data, start, newData, 0, length); +243 return newData; +244 } +245 } +246 +247 private class CologneInputBuffer extends CologneBuffer { +248 +249 public CologneInputBuffer(final char[] data) { +250 super(data); +251 } +252 +253 public void addLeft(final char ch) { +254 length++; +255 data[getNextPos()] = ch; +256 } +257 +258 @Override +259 protected char[] copyData(final int start, final int length) { +260 final char[] newData = new char[length]; +261 System.arraycopy(data, data.length - this.length + start, newData, 0, length); +262 return newData; +263 } +264 +265 public char getNextChar() { +266 return data[getNextPos()]; +267 } +268 +269 protected int getNextPos() { +270 return data.length - length; +271 } +272 +273 public char removeNext() { +274 final char ch = getNextChar(); +275 length--; +276 return ch; +277 } +278 } +279 +280 /** +281 * Maps some Germanic characters to plain for internal processing. The following characters are mapped: +282 * <ul> +283 * <li>capital a, umlaut mark</li> +284 * <li>capital u, umlaut mark</li> +285 * <li>capital o, umlaut mark</li> +286 * <li>small sharp s, German</li> +287 * </ul> +288 */ +289 +290 /* +291 * Returns whether the array contains the key, or not. +292 */ +293 private static boolean arrayContains(final char[] arr, final char key) { +294 for (final char element : arr) { +295 if (element == key) { +296 return true; +297 } +298 } +299 return false; +300 } +301 +302 /** +303 * <p> +304 * Implements the <i>Kölner Phonetik</i> algorithm. +305 * </p> +306 * <p> +307 * In contrast to the initial description of the algorithm, this implementation does the encoding in one pass. +308 * </p> +309 * +310 * @param text The source text to encode +311 * @return the corresponding encoding according to the <i>Kölner Phonetik</i> algorithm +312 */ +313 public String colognePhonetic(String text) { +314 if (text == null) { +315 return null; +316 } +317 +318 final CologneInputBuffer input = new CologneInputBuffer(preprocess(text)); +319 final CologneOutputBuffer output = new CologneOutputBuffer(input.length() * 2); +320 +321 char nextChar; +322 +323 final char CHAR_FIRST_POS = '/'; // are we processing the first character? +324 final char CHAR_IGNORE = '-'; // is this character to be ignored? +325 +326 char lastChar = CHAR_IGNORE; +327 char lastCode = CHAR_FIRST_POS; +328 char code; +329 char chr; +330 +331 while (input.length() > 0) { +332 chr = input.removeNext(); +333 +334 if (input.length() > 0) { +335 nextChar = input.getNextChar(); +336 } else { +337 nextChar = CHAR_IGNORE; +338 } +339 +340 // OK to ignore H here because it only affects nextChar which has already been set up +341 if (chr == 'H' || chr < 'A' || chr > 'Z') { +342 continue; // ignore unwanted characters +343 } +344 +345 if (arrayContains(AEIJOUY, chr)) { +346 code = '0'; +347 } else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) { +348 code = '1'; +349 } else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) { +350 code = '2'; +351 } else if (arrayContains(WFPV, chr)) { +352 code = '3'; +353 } else if (arrayContains(GKQ, chr)) { +354 code = '4'; +355 } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) { +356 code = '4'; +357 input.addLeft('S'); +358 } else if (chr == 'S' || chr == 'Z') { +359 code = '8'; +360 } else if (chr == 'C') { +361 if (lastCode == CHAR_FIRST_POS) { +362 if (arrayContains(AHKLOQRUX, nextChar)) { +363 code = '4'; +364 } else { +365 code = '8'; +366 } +367 } else { +368 if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) { +369 code = '8'; +370 } else { +371 code = '4'; +372 } +373 } +374 } else if (arrayContains(TDX, chr)) { +375 code = '8'; +376 } else if (chr == 'R') { +377 code = '7'; +378 } else if (chr == 'L') { +379 code = '5'; +380 } else if (chr == 'M' || chr == 'N') { +381 code = '6'; +382 } else { +383 code = chr; // should not happen? +384 } +385 +386 if (code != CHAR_IGNORE && (lastCode != code && (code != '0' || lastCode == CHAR_FIRST_POS) || code < '0' || code > '8')) { +387 output.addRight(code); +388 } +389 +390 lastChar = chr; +391 lastCode = code; +392 } +393 return output.toString(); +394 } +395 +396 @Override +397 public Object encode(final Object object) throws EncoderException { +398 if (!(object instanceof String)) { +399 throw new EncoderException("This method's parameter was expected to be of the type " + +400 String.class.getName() + +401 ". But actually it was of the type " + +402 object.getClass().getName() + +403 "."); +404 } +405 return encode((String) object); +406 } +407 +408 @Override +409 public String encode(final String text) { +410 return colognePhonetic(text); +411 } +412 +413 public boolean isEncodeEqual(final String text1, final String text2) { +414 return colognePhonetic(text1).equals(colognePhonetic(text2)); +415 } +416 +417 /** +418 * Converts the string to upper case and replaces Germanic umlaut characters +419 */ +420 private char[] preprocess(String text) { +421 // This converts German small sharp s (Eszett) to SS +422 final char[] chrs = text.toUpperCase(Locale.GERMAN).toCharArray(); +423 +424 for (int index = 0; index < chrs.length; index++) { +425 switch (chrs[index]) { +426 case '\u00C4': // capital A, umlaut mark +427 chrs[index] = 'A'; +428 break; +429 case '\u00DC': // capital U, umlaut mark +430 chrs[index] = 'U'; +431 break; +432 case '\u00D6': // capital O, umlaut mark +433 chrs[index] = 'O'; +434 break; +435 default: +436 break; +437 } +438 } +439 return chrs; +440 } +441} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.language; +018 +019import java.io.InputStream; +020import java.util.ArrayList; +021import java.util.Arrays; +022import java.util.Collections; +023import java.util.Comparator; +024import java.util.HashMap; +025import java.util.LinkedHashSet; +026import java.util.List; +027import java.util.Map; +028import java.util.Scanner; +029import java.util.Set; +030 +031import org.apache.commons.codec.CharEncoding; +032import org.apache.commons.codec.EncoderException; +033import org.apache.commons.codec.StringEncoder; +034 +035/** +036 * Encodes a string into a Daitch-Mokotoff Soundex value. +037 * <p> +038 * The Daitch-Mokotoff Soundex algorithm is a refinement of the Russel and American Soundex algorithms, yielding greater +039 * accuracy in matching especially Slavish and Yiddish surnames with similar pronunciation but differences in spelling. +040 * </p> +041 * <p> +042 * The main differences compared to the other soundex variants are: +043 * </p> +044 * <ul> +045 * <li>coded names are 6 digits long +046 * <li>the initial character of the name is coded +047 * <li>rules to encoded multi-character n-grams +048 * <li>multiple possible encodings for the same name (branching) +049 * </ul> +050 * <p> +051 * This implementation supports branching, depending on the used method: +052 * <ul> +053 * <li>{@link #encode(String)} - branching disabled, only the first code will be returned +054 * <li>{@link #soundex(String)} - branching enabled, all codes will be returned, separated by '|' +055 * </ul> +056 * <p> +057 * Note: this implementation has additional branching rules compared to the original description of the algorithm. The +058 * rules can be customized by overriding the default rules contained in the resource file +059 * {@code org/apache/commons/codec/language/dmrules.txt}. +060 * </p> +061 * <p> +062 * This class is thread-safe. +063 * </p> +064 * +065 * @see Soundex +066 * @see <a href="http://en.wikipedia.org/wiki/Daitch%E2%80%93Mokotoff_Soundex"> Wikipedia - Daitch-Mokotoff Soundex</a> +067 * @see <a href="http://www.avotaynu.com/soundex.htm">Avotaynu - Soundexing and Genealogy</a> +068 * +069 * @version $Id$ +070 * @since 1.10 +071 */ +072public class DaitchMokotoffSoundex implements StringEncoder { +073 +074 /** +075 * Inner class representing a branch during DM soundex encoding. +076 */ +077 private static final class Branch { +078 private final StringBuilder builder; +079 private String cachedString; +080 private String lastReplacement; +081 +082 private Branch() { +083 builder = new StringBuilder(); +084 lastReplacement = null; +085 cachedString = null; +086 } +087 +088 /** +089 * Creates a new branch, identical to this branch. +090 * +091 * @return a new, identical branch +092 */ +093 public Branch createBranch() { +094 final Branch branch = new Branch(); +095 branch.builder.append(toString()); +096 branch.lastReplacement = this.lastReplacement; +097 return branch; +098 } +099 +100 @Override +101 public boolean equals(final Object other) { +102 if (this == other) { +103 return true; +104 } +105 if (!(other instanceof Branch)) { +106 return false; +107 } +108 +109 return toString().equals(((Branch) other).toString()); +110 } +111 +112 /** +113 * Finish this branch by appending '0's until the maximum code length has been reached. +114 */ +115 public void finish() { +116 while (builder.length() < MAX_LENGTH) { +117 builder.append('0'); +118 cachedString = null; +119 } +120 } +121 +122 @Override +123 public int hashCode() { +124 return toString().hashCode(); +125 } +126 +127 /** +128 * Process the next replacement to be added to this branch. +129 * +130 * @param replacement +131 * the next replacement to append +132 * @param forceAppend +133 * indicates if the default processing shall be overridden +134 */ +135 public void processNextReplacement(final String replacement, final boolean forceAppend) { +136 final boolean append = lastReplacement == null || !lastReplacement.endsWith(replacement) || forceAppend; +137 +138 if (append && builder.length() < MAX_LENGTH) { +139 builder.append(replacement); +140 // remove all characters after the maximum length +141 if (builder.length() > MAX_LENGTH) { +142 builder.delete(MAX_LENGTH, builder.length()); +143 } +144 cachedString = null; +145 } +146 +147 lastReplacement = replacement; +148 } +149 +150 @Override +151 public String toString() { +152 if (cachedString == null) { +153 cachedString = builder.toString(); +154 } +155 return cachedString; +156 } +157 } +158 +159 /** +160 * Inner class for storing rules. +161 */ +162 private static final class Rule { +163 private final String pattern; +164 private final String[] replacementAtStart; +165 private final String[] replacementBeforeVowel; +166 private final String[] replacementDefault; +167 +168 protected Rule(final String pattern, final String replacementAtStart, final String replacementBeforeVowel, +169 final String replacementDefault) { +170 this.pattern = pattern; +171 this.replacementAtStart = replacementAtStart.split("\\|"); +172 this.replacementBeforeVowel = replacementBeforeVowel.split("\\|"); +173 this.replacementDefault = replacementDefault.split("\\|"); +174 } +175 +176 public int getPatternLength() { +177 return pattern.length(); +178 } +179 +180 public String[] getReplacements(final String context, final boolean atStart) { +181 if (atStart) { +182 return replacementAtStart; +183 } +184 +185 final int nextIndex = getPatternLength(); +186 final boolean nextCharIsVowel = nextIndex < context.length() ? isVowel(context.charAt(nextIndex)) : false; +187 if (nextCharIsVowel) { +188 return replacementBeforeVowel; +189 } +190 +191 return replacementDefault; +192 } +193 +194 private boolean isVowel(final char ch) { +195 return ch == 'a' || ch == 'e' || ch == 'i' || ch == 'o' || ch == 'u'; +196 } +197 +198 public boolean matches(final String context) { +199 return context.startsWith(pattern); +200 } +201 +202 @Override +203 public String toString() { +204 return String.format("%s=(%s,%s,%s)", pattern, Arrays.asList(replacementAtStart), +205 Arrays.asList(replacementBeforeVowel), Arrays.asList(replacementDefault)); +206 } +207 } +208 +209 private static final String COMMENT = "//"; +210 private static final String DOUBLE_QUOTE = "\""; +211 +212 private static final String MULTILINE_COMMENT_END = "*/"; +213 +214 private static final String MULTILINE_COMMENT_START = "/*"; +215 +216 /** The resource file containing the replacement and folding rules */ +217 private static final String RESOURCE_FILE = "org/apache/commons/codec/language/dmrules.txt"; +218 +219 /** The code length of a DM soundex value. */ +220 private static final int MAX_LENGTH = 6; +221 +222 /** Transformation rules indexed by the first character of their pattern. */ +223 private static final Map<Character, List<Rule>> RULES = new HashMap<>(); +224 +225 /** Folding rules. */ +226 private static final Map<Character, Character> FOLDINGS = new HashMap<>(); +227 +228 static { +229 final InputStream rulesIS = DaitchMokotoffSoundex.class.getClassLoader().getResourceAsStream(RESOURCE_FILE); +230 if (rulesIS == null) { +231 throw new IllegalArgumentException("Unable to load resource: " + RESOURCE_FILE); +232 } +233 +234 try (final Scanner scanner = new Scanner(rulesIS, CharEncoding.UTF_8)) { +235 parseRules(scanner, RESOURCE_FILE, RULES, FOLDINGS); +236 } +237 +238 // sort RULES by pattern length in descending order +239 for (final Map.Entry<Character, List<Rule>> rule : RULES.entrySet()) { +240 final List<Rule> ruleList = rule.getValue(); +241 Collections.sort(ruleList, new Comparator<Rule>() { +242 @Override +243 public int compare(final Rule rule1, final Rule rule2) { +244 return rule2.getPatternLength() - rule1.getPatternLength(); +245 } +246 }); +247 } +248 } +249 +250 private static void parseRules(final Scanner scanner, final String location, +251 final Map<Character, List<Rule>> ruleMapping, final Map<Character, Character> asciiFoldings) { +252 int currentLine = 0; +253 boolean inMultilineComment = false; +254 +255 while (scanner.hasNextLine()) { +256 currentLine++; +257 final String rawLine = scanner.nextLine(); +258 String line = rawLine; +259 +260 if (inMultilineComment) { +261 if (line.endsWith(MULTILINE_COMMENT_END)) { +262 inMultilineComment = false; +263 } +264 continue; +265 } +266 +267 if (line.startsWith(MULTILINE_COMMENT_START)) { +268 inMultilineComment = true; +269 } else { +270 // discard comments +271 final int cmtI = line.indexOf(COMMENT); +272 if (cmtI >= 0) { +273 line = line.substring(0, cmtI); +274 } +275 +276 // trim leading-trailing whitespace +277 line = line.trim(); +278 +279 if (line.length() == 0) { +280 continue; // empty lines can be safely skipped +281 } +282 +283 if (line.contains("=")) { +284 // folding +285 final String[] parts = line.split("="); +286 if (parts.length != 2) { +287 throw new IllegalArgumentException("Malformed folding statement split into " + parts.length + +288 " parts: " + rawLine + " in " + location); +289 } +290 final String leftCharacter = parts[0]; +291 final String rightCharacter = parts[1]; +292 +293 if (leftCharacter.length() != 1 || rightCharacter.length() != 1) { +294 throw new IllegalArgumentException("Malformed folding statement - " + +295 "patterns are not single characters: " + rawLine + " in " + location); +296 } +297 +298 asciiFoldings.put(leftCharacter.charAt(0), rightCharacter.charAt(0)); +299 } else { +300 // rule +301 final String[] parts = line.split("\\s+"); +302 if (parts.length != 4) { +303 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +304 " parts: " + rawLine + " in " + location); +305 } +306 try { +307 final String pattern = stripQuotes(parts[0]); +308 final String replacement1 = stripQuotes(parts[1]); +309 final String replacement2 = stripQuotes(parts[2]); +310 final String replacement3 = stripQuotes(parts[3]); +311 +312 final Rule r = new Rule(pattern, replacement1, replacement2, replacement3); +313 final char patternKey = r.pattern.charAt(0); +314 List<Rule> rules = ruleMapping.get(patternKey); +315 if (rules == null) { +316 rules = new ArrayList<>(); +317 ruleMapping.put(patternKey, rules); +318 } +319 rules.add(r); +320 } catch (final IllegalArgumentException e) { +321 throw new IllegalStateException( +322 "Problem parsing line '" + currentLine + "' in " + location, e); +323 } +324 } +325 } +326 } +327 } +328 +329 private static String stripQuotes(String str) { +330 if (str.startsWith(DOUBLE_QUOTE)) { +331 str = str.substring(1); +332 } +333 +334 if (str.endsWith(DOUBLE_QUOTE)) { +335 str = str.substring(0, str.length() - 1); +336 } +337 +338 return str; +339 } +340 +341 /** Whether to use ASCII folding prior to encoding. */ +342 private final boolean folding; +343 +344 /** +345 * Creates a new instance with ASCII-folding enabled. +346 */ +347 public DaitchMokotoffSoundex() { +348 this(true); +349 } +350 +351 /** +352 * Creates a new instance. +353 * <p> +354 * With ASCII-folding enabled, certain accented characters will be transformed to equivalent ASCII characters, e.g. +355 * è -> e. +356 * </p> +357 * +358 * @param folding +359 * if ASCII-folding shall be performed before encoding +360 */ +361 public DaitchMokotoffSoundex(final boolean folding) { +362 this.folding = folding; +363 } +364 +365 /** +366 * Performs a cleanup of the input string before the actual soundex transformation. +367 * <p> +368 * Removes all whitespace characters and performs ASCII folding if enabled. +369 * </p> +370 * +371 * @param input +372 * the input string to cleanup +373 * @return a cleaned up string +374 */ +375 private String cleanup(final String input) { +376 final StringBuilder sb = new StringBuilder(); +377 for (char ch : input.toCharArray()) { +378 if (Character.isWhitespace(ch)) { +379 continue; +380 } +381 +382 ch = Character.toLowerCase(ch); +383 if (folding && FOLDINGS.containsKey(ch)) { +384 ch = FOLDINGS.get(ch); +385 } +386 sb.append(ch); +387 } +388 return sb.toString(); +389 } +390 +391 /** +392 * Encodes an Object using the Daitch-Mokotoff soundex algorithm without branching. +393 * <p> +394 * This method is provided in order to satisfy the requirements of the Encoder interface, and will throw an +395 * EncoderException if the supplied object is not of type java.lang.String. +396 * </p> +397 * +398 * @see #soundex(String) +399 * +400 * @param obj +401 * Object to encode +402 * @return An object (of type java.lang.String) containing the DM soundex code, which corresponds to the String +403 * supplied. +404 * @throws EncoderException +405 * if the parameter supplied is not of type java.lang.String +406 * @throws IllegalArgumentException +407 * if a character is not mapped +408 */ +409 @Override +410 public Object encode(final Object obj) throws EncoderException { +411 if (!(obj instanceof String)) { +412 throw new EncoderException( +413 "Parameter supplied to DaitchMokotoffSoundex encode is not of type java.lang.String"); +414 } +415 return encode((String) obj); +416 } +417 +418 /** +419 * Encodes a String using the Daitch-Mokotoff soundex algorithm without branching. +420 * +421 * @see #soundex(String) +422 * +423 * @param source +424 * A String object to encode +425 * @return A DM Soundex code corresponding to the String supplied +426 * @throws IllegalArgumentException +427 * if a character is not mapped +428 */ +429 @Override +430 public String encode(final String source) { +431 if (source == null) { +432 return null; +433 } +434 return soundex(source, false)[0]; +435 } +436 +437 /** +438 * Encodes a String using the Daitch-Mokotoff soundex algorithm with branching. +439 * <p> +440 * In case a string is encoded into multiple codes (see branching rules), the result will contain all codes, +441 * separated by '|'. +442 * </p> +443 * <p> +444 * Example: the name "AUERBACH" is encoded as both +445 * </p> +446 * <ul> +447 * <li>097400</li> +448 * <li>097500</li> +449 * </ul> +450 * <p> +451 * Thus the result will be "097400|097500". +452 * </p> +453 * +454 * @param source +455 * A String object to encode +456 * @return A string containing a set of DM Soundex codes corresponding to the String supplied +457 * @throws IllegalArgumentException +458 * if a character is not mapped +459 */ +460 public String soundex(final String source) { +461 final String[] branches = soundex(source, true); +462 final StringBuilder sb = new StringBuilder(); +463 int index = 0; +464 for (final String branch : branches) { +465 sb.append(branch); +466 if (++index < branches.length) { +467 sb.append('|'); +468 } +469 } +470 return sb.toString(); +471 } +472 +473 /** +474 * Perform the actual DM Soundex algorithm on the input string. +475 * +476 * @param source +477 * A String object to encode +478 * @param branching +479 * If branching shall be performed +480 * @return A string array containing all DM Soundex codes corresponding to the String supplied depending on the +481 * selected branching mode +482 */ +483 private String[] soundex(final String source, final boolean branching) { +484 if (source == null) { +485 return null; +486 } +487 +488 final String input = cleanup(source); +489 +490 final Set<Branch> currentBranches = new LinkedHashSet<>(); +491 currentBranches.add(new Branch()); +492 +493 char lastChar = '\0'; +494 for (int index = 0; index < input.length(); index++) { +495 final char ch = input.charAt(index); +496 +497 // ignore whitespace inside a name +498 if (Character.isWhitespace(ch)) { +499 continue; +500 } +501 +502 final String inputContext = input.substring(index); +503 final List<Rule> rules = RULES.get(ch); +504 if (rules == null) { +505 continue; +506 } +507 +508 // use an EMPTY_LIST to avoid false positive warnings wrt potential null pointer access +509 @SuppressWarnings("unchecked") +510 final List<Branch> nextBranches = branching ? new ArrayList<Branch>() : Collections.EMPTY_LIST; +511 +512 for (final Rule rule : rules) { +513 if (rule.matches(inputContext)) { +514 if (branching) { +515 nextBranches.clear(); +516 } +517 final String[] replacements = rule.getReplacements(inputContext, lastChar == '\0'); +518 final boolean branchingRequired = replacements.length > 1 && branching; +519 +520 for (final Branch branch : currentBranches) { +521 for (final String nextReplacement : replacements) { +522 // if we have multiple replacements, always create a new branch +523 final Branch nextBranch = branchingRequired ? branch.createBranch() : branch; +524 +525 // special rule: occurrences of mn or nm are treated differently +526 final boolean force = (lastChar == 'm' && ch == 'n') || (lastChar == 'n' && ch == 'm'); +527 +528 nextBranch.processNextReplacement(nextReplacement, force); +529 +530 if (branching) { +531 nextBranches.add(nextBranch); +532 } else { +533 break; +534 } +535 } +536 } +537 +538 if (branching) { +539 currentBranches.clear(); +540 currentBranches.addAll(nextBranches); +541 } +542 index += rule.getPatternLength() - 1; +543 break; +544 } +545 } +546 +547 lastChar = ch; +548 } +549 +550 final String[] result = new String[currentBranches.size()]; +551 int index = 0; +552 for (final Branch branch : currentBranches) { +553 branch.finish(); +554 result[index++] = branch.toString(); +555 } +556 +557 return result; +558 } +559} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022import org.apache.commons.codec.binary.StringUtils; +023 +024/** +025 * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence +026 * Philips</CITE>. +027 * <p> +028 * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable +029 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is +030 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication +031 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup. +032 * +033 * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a> +034 * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a> +035 * +036 * @version $Id$ +037 */ +038public class DoubleMetaphone implements StringEncoder { +039 +040 /** +041 * "Vowels" to test for +042 */ +043 private static final String VOWELS = "AEIOUY"; +044 +045 /** +046 * Prefixes when present which are not pronounced +047 */ +048 private static final String[] SILENT_START = +049 { "GN", "KN", "PN", "WR", "PS" }; +050 private static final String[] L_R_N_M_B_H_F_V_W_SPACE = +051 { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " }; +052 private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = +053 { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" }; +054 private static final String[] L_T_K_S_N_M_B_Z = +055 { "L", "T", "K", "S", "N", "M", "B", "Z" }; +056 +057 /** +058 * Maximum length of an encoding, default is 4 +059 */ +060 private int maxCodeLen = 4; +061 +062 /** +063 * Creates an instance of this DoubleMetaphone encoder +064 */ +065 public DoubleMetaphone() { +066 super(); +067 } +068 +069 /** +070 * Encode a value with Double Metaphone. +071 * +072 * @param value String to encode +073 * @return an encoded string +074 */ +075 public String doubleMetaphone(final String value) { +076 return doubleMetaphone(value, false); +077 } +078 +079 /** +080 * Encode a value with Double Metaphone, optionally using the alternate encoding. +081 * +082 * @param value String to encode +083 * @param alternate use alternate encode +084 * @return an encoded string +085 */ +086 public String doubleMetaphone(String value, final boolean alternate) { +087 value = cleanInput(value); +088 if (value == null) { +089 return null; +090 } +091 +092 final boolean slavoGermanic = isSlavoGermanic(value); +093 int index = isSilentStart(value) ? 1 : 0; +094 +095 final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen()); +096 +097 while (!result.isComplete() && index <= value.length() - 1) { +098 switch (value.charAt(index)) { +099 case 'A': +100 case 'E': +101 case 'I': +102 case 'O': +103 case 'U': +104 case 'Y': +105 index = handleAEIOUY(result, index); +106 break; +107 case 'B': +108 result.append('P'); +109 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1; +110 break; +111 case '\u00C7': +112 // A C with a Cedilla +113 result.append('S'); +114 index++; +115 break; +116 case 'C': +117 index = handleC(value, result, index); +118 break; +119 case 'D': +120 index = handleD(value, result, index); +121 break; +122 case 'F': +123 result.append('F'); +124 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1; +125 break; +126 case 'G': +127 index = handleG(value, result, index, slavoGermanic); +128 break; +129 case 'H': +130 index = handleH(value, result, index); +131 break; +132 case 'J': +133 index = handleJ(value, result, index, slavoGermanic); +134 break; +135 case 'K': +136 result.append('K'); +137 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1; +138 break; +139 case 'L': +140 index = handleL(value, result, index); +141 break; +142 case 'M': +143 result.append('M'); +144 index = conditionM0(value, index) ? index + 2 : index + 1; +145 break; +146 case 'N': +147 result.append('N'); +148 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1; +149 break; +150 case '\u00D1': +151 // N with a tilde (spanish ene) +152 result.append('N'); +153 index++; +154 break; +155 case 'P': +156 index = handleP(value, result, index); +157 break; +158 case 'Q': +159 result.append('K'); +160 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1; +161 break; +162 case 'R': +163 index = handleR(value, result, index, slavoGermanic); +164 break; +165 case 'S': +166 index = handleS(value, result, index, slavoGermanic); +167 break; +168 case 'T': +169 index = handleT(value, result, index); +170 break; +171 case 'V': +172 result.append('F'); +173 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1; +174 break; +175 case 'W': +176 index = handleW(value, result, index); +177 break; +178 case 'X': +179 index = handleX(value, result, index); +180 break; +181 case 'Z': +182 index = handleZ(value, result, index, slavoGermanic); +183 break; +184 default: +185 index++; +186 break; +187 } +188 } +189 +190 return alternate ? result.getAlternate() : result.getPrimary(); +191 } +192 +193 /** +194 * Encode the value using DoubleMetaphone. It will only work if +195 * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>). +196 * +197 * @param obj Object to encode (should be of type String) +198 * @return An encoded Object (will be of type String) +199 * @throws EncoderException encode parameter is not of type String +200 */ +201 @Override +202 public Object encode(final Object obj) throws EncoderException { +203 if (!(obj instanceof String)) { +204 throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); +205 } +206 return doubleMetaphone((String) obj); +207 } +208 +209 /** +210 * Encode the value using DoubleMetaphone. +211 * +212 * @param value String to encode +213 * @return An encoded String +214 */ +215 @Override +216 public String encode(final String value) { +217 return doubleMetaphone(value); +218 } +219 +220 /** +221 * Check if the Double Metaphone values of two <code>String</code> values +222 * are equal. +223 * +224 * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. +225 * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. +226 * @return <code>true</code> if the encoded <code>String</code>s are equal; +227 * <code>false</code> otherwise. +228 * @see #isDoubleMetaphoneEqual(String,String,boolean) +229 */ +230 public boolean isDoubleMetaphoneEqual(final String value1, final String value2) { +231 return isDoubleMetaphoneEqual(value1, value2, false); +232 } +233 +234 /** +235 * Check if the Double Metaphone values of two <code>String</code> values +236 * are equal, optionally using the alternate value. +237 * +238 * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. +239 * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. +240 * @param alternate use the alternate value if <code>true</code>. +241 * @return <code>true</code> if the encoded <code>String</code>s are equal; +242 * <code>false</code> otherwise. +243 */ +244 public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) { +245 return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate)); +246 } +247 +248 /** +249 * Returns the maxCodeLen. +250 * @return int +251 */ +252 public int getMaxCodeLen() { +253 return this.maxCodeLen; +254 } +255 +256 /** +257 * Sets the maxCodeLen. +258 * @param maxCodeLen The maxCodeLen to set +259 */ +260 public void setMaxCodeLen(final int maxCodeLen) { +261 this.maxCodeLen = maxCodeLen; +262 } +263 +264 //-- BEGIN HANDLERS --// +265 +266 /** +267 * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases. +268 */ +269 private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) { +270 if (index == 0) { +271 result.append('A'); +272 } +273 return index + 1; +274 } +275 +276 /** +277 * Handles 'C' cases. +278 */ +279 private int handleC(final String value, final DoubleMetaphoneResult result, int index) { +280 if (conditionC0(value, index)) { // very confusing, moved out +281 result.append('K'); +282 index += 2; +283 } else if (index == 0 && contains(value, index, 6, "CAESAR")) { +284 result.append('S'); +285 index += 2; +286 } else if (contains(value, index, 2, "CH")) { +287 index = handleCH(value, result, index); +288 } else if (contains(value, index, 2, "CZ") && +289 !contains(value, index - 2, 4, "WICZ")) { +290 //-- "Czerny" --// +291 result.append('S', 'X'); +292 index += 2; +293 } else if (contains(value, index + 1, 3, "CIA")) { +294 //-- "focaccia" --// +295 result.append('X'); +296 index += 3; +297 } else if (contains(value, index, 2, "CC") && +298 !(index == 1 && charAt(value, 0) == 'M')) { +299 //-- double "cc" but not "McClelland" --// +300 return handleCC(value, result, index); +301 } else if (contains(value, index, 2, "CK", "CG", "CQ")) { +302 result.append('K'); +303 index += 2; +304 } else if (contains(value, index, 2, "CI", "CE", "CY")) { +305 //-- Italian vs. English --// +306 if (contains(value, index, 3, "CIO", "CIE", "CIA")) { +307 result.append('S', 'X'); +308 } else { +309 result.append('S'); +310 } +311 index += 2; +312 } else { +313 result.append('K'); +314 if (contains(value, index + 1, 2, " C", " Q", " G")) { +315 //-- Mac Caffrey, Mac Gregor --// +316 index += 3; +317 } else if (contains(value, index + 1, 1, "C", "K", "Q") && +318 !contains(value, index + 1, 2, "CE", "CI")) { +319 index += 2; +320 } else { +321 index++; +322 } +323 } +324 +325 return index; +326 } +327 +328 /** +329 * Handles 'CC' cases. +330 */ +331 private int handleCC(final String value, final DoubleMetaphoneResult result, int index) { +332 if (contains(value, index + 2, 1, "I", "E", "H") && +333 !contains(value, index + 2, 2, "HU")) { +334 //-- "bellocchio" but not "bacchus" --// +335 if ((index == 1 && charAt(value, index - 1) == 'A') || +336 contains(value, index - 1, 5, "UCCEE", "UCCES")) { +337 //-- "accident", "accede", "succeed" --// +338 result.append("KS"); +339 } else { +340 //-- "bacci", "bertucci", other Italian --// +341 result.append('X'); +342 } +343 index += 3; +344 } else { // Pierce's rule +345 result.append('K'); +346 index += 2; +347 } +348 +349 return index; +350 } +351 +352 /** +353 * Handles 'CH' cases. +354 */ +355 private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) { +356 if (index > 0 && contains(value, index, 4, "CHAE")) { // Michael +357 result.append('K', 'X'); +358 return index + 2; +359 } else if (conditionCH0(value, index)) { +360 //-- Greek roots ("chemistry", "chorus", etc.) --// +361 result.append('K'); +362 return index + 2; +363 } else if (conditionCH1(value, index)) { +364 //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --// +365 result.append('K'); +366 return index + 2; +367 } else { +368 if (index > 0) { +369 if (contains(value, 0, 2, "MC")) { +370 result.append('K'); +371 } else { +372 result.append('X', 'K'); +373 } +374 } else { +375 result.append('X'); +376 } +377 return index + 2; +378 } +379 } +380 +381 /** +382 * Handles 'D' cases. +383 */ +384 private int handleD(final String value, final DoubleMetaphoneResult result, int index) { +385 if (contains(value, index, 2, "DG")) { +386 //-- "Edge" --// +387 if (contains(value, index + 2, 1, "I", "E", "Y")) { +388 result.append('J'); +389 index += 3; +390 //-- "Edgar" --// +391 } else { +392 result.append("TK"); +393 index += 2; +394 } +395 } else if (contains(value, index, 2, "DT", "DD")) { +396 result.append('T'); +397 index += 2; +398 } else { +399 result.append('T'); +400 index++; +401 } +402 return index; +403 } +404 +405 /** +406 * Handles 'G' cases. +407 */ +408 private int handleG(final String value, final DoubleMetaphoneResult result, int index, +409 final boolean slavoGermanic) { +410 if (charAt(value, index + 1) == 'H') { +411 index = handleGH(value, result, index); +412 } else if (charAt(value, index + 1) == 'N') { +413 if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) { +414 result.append("KN", "N"); +415 } else if (!contains(value, index + 2, 2, "EY") && +416 charAt(value, index + 1) != 'Y' && !slavoGermanic) { +417 result.append("N", "KN"); +418 } else { +419 result.append("KN"); +420 } +421 index = index + 2; +422 } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) { +423 result.append("KL", "L"); +424 index += 2; +425 } else if (index == 0 && +426 (charAt(value, index + 1) == 'Y' || +427 contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) { +428 //-- -ges-, -gep-, -gel-, -gie- at beginning --// +429 result.append('K', 'J'); +430 index += 2; +431 } else if ((contains(value, index + 1, 2, "ER") || +432 charAt(value, index + 1) == 'Y') && +433 !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") && +434 !contains(value, index - 1, 1, "E", "I") && +435 !contains(value, index - 1, 3, "RGY", "OGY")) { +436 //-- -ger-, -gy- --// +437 result.append('K', 'J'); +438 index += 2; +439 } else if (contains(value, index + 1, 1, "E", "I", "Y") || +440 contains(value, index - 1, 4, "AGGI", "OGGI")) { +441 //-- Italian "biaggi" --// +442 if (contains(value, 0 ,4, "VAN ", "VON ") || +443 contains(value, 0, 3, "SCH") || +444 contains(value, index + 1, 2, "ET")) { +445 //-- obvious germanic --// +446 result.append('K'); +447 } else if (contains(value, index + 1, 3, "IER")) { +448 result.append('J'); +449 } else { +450 result.append('J', 'K'); +451 } +452 index += 2; +453 } else if (charAt(value, index + 1) == 'G') { +454 index += 2; +455 result.append('K'); +456 } else { +457 index++; +458 result.append('K'); +459 } +460 return index; +461 } +462 +463 /** +464 * Handles 'GH' cases. +465 */ +466 private int handleGH(final String value, final DoubleMetaphoneResult result, int index) { +467 if (index > 0 && !isVowel(charAt(value, index - 1))) { +468 result.append('K'); +469 index += 2; +470 } else if (index == 0) { +471 if (charAt(value, index + 2) == 'I') { +472 result.append('J'); +473 } else { +474 result.append('K'); +475 } +476 index += 2; +477 } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) || +478 (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) || +479 (index > 3 && contains(value, index - 4, 1, "B", "H"))) { +480 //-- Parker's rule (with some further refinements) - "hugh" +481 index += 2; +482 } else { +483 if (index > 2 && charAt(value, index - 1) == 'U' && +484 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) { +485 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough" +486 result.append('F'); +487 } else if (index > 0 && charAt(value, index - 1) != 'I') { +488 result.append('K'); +489 } +490 index += 2; +491 } +492 return index; +493 } +494 +495 /** +496 * Handles 'H' cases. +497 */ +498 private int handleH(final String value, final DoubleMetaphoneResult result, int index) { +499 //-- only keep if first & before vowel or between 2 vowels --// +500 if ((index == 0 || isVowel(charAt(value, index - 1))) && +501 isVowel(charAt(value, index + 1))) { +502 result.append('H'); +503 index += 2; +504 //-- also takes car of "HH" --// +505 } else { +506 index++; +507 } +508 return index; +509 } +510 +511 /** +512 * Handles 'J' cases. +513 */ +514 private int handleJ(final String value, final DoubleMetaphoneResult result, int index, +515 final boolean slavoGermanic) { +516 if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) { +517 //-- obvious Spanish, "Jose", "San Jacinto" --// +518 if ((index == 0 && (charAt(value, index + 4) == ' ') || +519 value.length() == 4) || contains(value, 0, 4, "SAN ")) { +520 result.append('H'); +521 } else { +522 result.append('J', 'H'); +523 } +524 index++; +525 } else { +526 if (index == 0 && !contains(value, index, 4, "JOSE")) { +527 result.append('J', 'A'); +528 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && +529 (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) { +530 result.append('J', 'H'); +531 } else if (index == value.length() - 1) { +532 result.append('J', ' '); +533 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && +534 !contains(value, index - 1, 1, "S", "K", "L")) { +535 result.append('J'); +536 } +537 +538 if (charAt(value, index + 1) == 'J') { +539 index += 2; +540 } else { +541 index++; +542 } +543 } +544 return index; +545 } +546 +547 /** +548 * Handles 'L' cases. +549 */ +550 private int handleL(final String value, final DoubleMetaphoneResult result, int index) { +551 if (charAt(value, index + 1) == 'L') { +552 if (conditionL0(value, index)) { +553 result.appendPrimary('L'); +554 } else { +555 result.append('L'); +556 } +557 index += 2; +558 } else { +559 index++; +560 result.append('L'); +561 } +562 return index; +563 } +564 +565 /** +566 * Handles 'P' cases. +567 */ +568 private int handleP(final String value, final DoubleMetaphoneResult result, int index) { +569 if (charAt(value, index + 1) == 'H') { +570 result.append('F'); +571 index += 2; +572 } else { +573 result.append('P'); +574 index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1; +575 } +576 return index; +577 } +578 +579 /** +580 * Handles 'R' cases. +581 */ +582 private int handleR(final String value, final DoubleMetaphoneResult result, final int index, +583 final boolean slavoGermanic) { +584 if (index == value.length() - 1 && !slavoGermanic && +585 contains(value, index - 2, 2, "IE") && +586 !contains(value, index - 4, 2, "ME", "MA")) { +587 result.appendAlternate('R'); +588 } else { +589 result.append('R'); +590 } +591 return charAt(value, index + 1) == 'R' ? index + 2 : index + 1; +592 } +593 +594 /** +595 * Handles 'S' cases. +596 */ +597 private int handleS(final String value, final DoubleMetaphoneResult result, int index, +598 final boolean slavoGermanic) { +599 if (contains(value, index - 1, 3, "ISL", "YSL")) { +600 //-- special cases "island", "isle", "carlisle", "carlysle" --// +601 index++; +602 } else if (index == 0 && contains(value, index, 5, "SUGAR")) { +603 //-- special case "sugar-" --// +604 result.append('X', 'S'); +605 index++; +606 } else if (contains(value, index, 2, "SH")) { +607 if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) { +608 //-- germanic --// +609 result.append('S'); +610 } else { +611 result.append('X'); +612 } +613 index += 2; +614 } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) { +615 //-- Italian and Armenian --// +616 if (slavoGermanic) { +617 result.append('S'); +618 } else { +619 result.append('S', 'X'); +620 } +621 index += 3; +622 } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || +623 contains(value, index + 1, 1, "Z")) { +624 //-- german & anglicisations, e.g. "smith" match "schmidt" // +625 // "snider" match "schneider" --// +626 //-- also, -sz- in slavic language although in hungarian it // +627 // is pronounced "s" --// +628 result.append('S', 'X'); +629 index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1; +630 } else if (contains(value, index, 2, "SC")) { +631 index = handleSC(value, result, index); +632 } else { +633 if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) { +634 //-- french e.g. "resnais", "artois" --// +635 result.appendAlternate('S'); +636 } else { +637 result.append('S'); +638 } +639 index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1; +640 } +641 return index; +642 } +643 +644 /** +645 * Handles 'SC' cases. +646 */ +647 private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) { +648 if (charAt(value, index + 2) == 'H') { +649 //-- Schlesinger's rule --// +650 if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) { +651 //-- Dutch origin, e.g. "school", "schooner" --// +652 if (contains(value, index + 3, 2, "ER", "EN")) { +653 //-- "schermerhorn", "schenker" --// +654 result.append("X", "SK"); +655 } else { +656 result.append("SK"); +657 } +658 } else { +659 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') { +660 result.append('X', 'S'); +661 } else { +662 result.append('X'); +663 } +664 } +665 } else if (contains(value, index + 2, 1, "I", "E", "Y")) { +666 result.append('S'); +667 } else { +668 result.append("SK"); +669 } +670 return index + 3; +671 } +672 +673 /** +674 * Handles 'T' cases. +675 */ +676 private int handleT(final String value, final DoubleMetaphoneResult result, int index) { +677 if (contains(value, index, 4, "TION")) { +678 result.append('X'); +679 index += 3; +680 } else if (contains(value, index, 3, "TIA", "TCH")) { +681 result.append('X'); +682 index += 3; +683 } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) { +684 if (contains(value, index + 2, 2, "OM", "AM") || +685 //-- special case "thomas", "thames" or germanic --// +686 contains(value, 0, 4, "VAN ", "VON ") || +687 contains(value, 0, 3, "SCH")) { +688 result.append('T'); +689 } else { +690 result.append('0', 'T'); +691 } +692 index += 2; +693 } else { +694 result.append('T'); +695 index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1; +696 } +697 return index; +698 } +699 +700 /** +701 * Handles 'W' cases. +702 */ +703 private int handleW(final String value, final DoubleMetaphoneResult result, int index) { +704 if (contains(value, index, 2, "WR")) { +705 //-- can also be in middle of word --// +706 result.append('R'); +707 index += 2; +708 } else { +709 if (index == 0 && (isVowel(charAt(value, index + 1)) || +710 contains(value, index, 2, "WH"))) { +711 if (isVowel(charAt(value, index + 1))) { +712 //-- Wasserman should match Vasserman --// +713 result.append('A', 'F'); +714 } else { +715 //-- need Uomo to match Womo --// +716 result.append('A'); +717 } +718 index++; +719 } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) || +720 contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") || +721 contains(value, 0, 3, "SCH")) { +722 //-- Arnow should match Arnoff --// +723 result.appendAlternate('F'); +724 index++; +725 } else if (contains(value, index, 4, "WICZ", "WITZ")) { +726 //-- Polish e.g. "filipowicz" --// +727 result.append("TS", "FX"); +728 index += 4; +729 } else { +730 index++; +731 } +732 } +733 return index; +734 } +735 +736 /** +737 * Handles 'X' cases. +738 */ +739 private int handleX(final String value, final DoubleMetaphoneResult result, int index) { +740 if (index == 0) { +741 result.append('S'); +742 index++; +743 } else { +744 if (!((index == value.length() - 1) && +745 (contains(value, index - 3, 3, "IAU", "EAU") || +746 contains(value, index - 2, 2, "AU", "OU")))) { +747 //-- French e.g. breaux --// +748 result.append("KS"); +749 } +750 index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1; +751 } +752 return index; +753 } +754 +755 /** +756 * Handles 'Z' cases. +757 */ +758 private int handleZ(final String value, final DoubleMetaphoneResult result, int index, +759 final boolean slavoGermanic) { +760 if (charAt(value, index + 1) == 'H') { +761 //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --// +762 result.append('J'); +763 index += 2; +764 } else { +765 if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || +766 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) { +767 result.append("S", "TS"); +768 } else { +769 result.append('S'); +770 } +771 index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1; +772 } +773 return index; +774 } +775 +776 //-- BEGIN CONDITIONS --// +777 +778 /** +779 * Complex condition 0 for 'C'. +780 */ +781 private boolean conditionC0(final String value, final int index) { +782 if (contains(value, index, 4, "CHIA")) { +783 return true; +784 } else if (index <= 1) { +785 return false; +786 } else if (isVowel(charAt(value, index - 2))) { +787 return false; +788 } else if (!contains(value, index - 1, 3, "ACH")) { +789 return false; +790 } else { +791 final char c = charAt(value, index + 2); +792 return (c != 'I' && c != 'E') || +793 contains(value, index - 2, 6, "BACHER", "MACHER"); +794 } +795 } +796 +797 /** +798 * Complex condition 0 for 'CH'. +799 */ +800 private boolean conditionCH0(final String value, final int index) { +801 if (index != 0) { +802 return false; +803 } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && +804 !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) { +805 return false; +806 } else if (contains(value, 0, 5, "CHORE")) { +807 return false; +808 } else { +809 return true; +810 } +811 } +812 +813 /** +814 * Complex condition 1 for 'CH'. +815 */ +816 private boolean conditionCH1(final String value, final int index) { +817 return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || +818 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") || +819 contains(value, index + 2, 1, "T", "S") || +820 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) && +821 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1))); +822 } +823 +824 /** +825 * Complex condition 0 for 'L'. +826 */ +827 private boolean conditionL0(final String value, final int index) { +828 if (index == value.length() - 3 && +829 contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) { +830 return true; +831 } else if ((contains(value, value.length() - 2, 2, "AS", "OS") || +832 contains(value, value.length() - 1, 1, "A", "O")) && +833 contains(value, index - 1, 4, "ALLE")) { +834 return true; +835 } else { +836 return false; +837 } +838 } +839 +840 /** +841 * Complex condition 0 for 'M'. +842 */ +843 private boolean conditionM0(final String value, final int index) { +844 if (charAt(value, index + 1) == 'M') { +845 return true; +846 } +847 return contains(value, index - 1, 3, "UMB") && +848 ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER")); +849 } +850 +851 //-- BEGIN HELPER FUNCTIONS --// +852 +853 /** +854 * Determines whether or not a value is of slavo-germanic origin. A value is +855 * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'. +856 */ +857 private boolean isSlavoGermanic(final String value) { +858 return value.indexOf('W') > -1 || value.indexOf('K') > -1 || +859 value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1; +860 } +861 +862 /** +863 * Determines whether or not a character is a vowel or not +864 */ +865 private boolean isVowel(final char ch) { +866 return VOWELS.indexOf(ch) != -1; +867 } +868 +869 /** +870 * Determines whether or not the value starts with a silent letter. It will +871 * return <code>true</code> if the value starts with any of 'GN', 'KN', +872 * 'PN', 'WR' or 'PS'. +873 */ +874 private boolean isSilentStart(final String value) { +875 boolean result = false; +876 for (final String element : SILENT_START) { +877 if (value.startsWith(element)) { +878 result = true; +879 break; +880 } +881 } +882 return result; +883 } +884 +885 /** +886 * Cleans the input. +887 */ +888 private String cleanInput(String input) { +889 if (input == null) { +890 return null; +891 } +892 input = input.trim(); +893 if (input.length() == 0) { +894 return null; +895 } +896 return input.toUpperCase(java.util.Locale.ENGLISH); +897 } +898 +899 /* +900 * Gets the character at index <code>index</code> if available, otherwise +901 * it returns <code>Character.MIN_VALUE</code> so that there is some sort +902 * of a default. +903 */ +904 protected char charAt(final String value, final int index) { +905 if (index < 0 || index >= value.length()) { +906 return Character.MIN_VALUE; +907 } +908 return value.charAt(index); +909 } +910 +911 /* +912 * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and +913 * matching up to length <code>length</code>. +914 */ +915 protected static boolean contains(final String value, final int start, final int length, +916 final String... criteria) { +917 boolean result = false; +918 if (start >= 0 && start + length <= value.length()) { +919 final String target = value.substring(start, start + length); +920 +921 for (final String element : criteria) { +922 if (target.equals(element)) { +923 result = true; +924 break; +925 } +926 } +927 } +928 return result; +929 } +930 +931 //-- BEGIN INNER CLASSES --// +932 +933 /** +934 * Inner class for storing results, since there is the optional alternate encoding. +935 */ +936 public class DoubleMetaphoneResult { +937 +938 private final StringBuilder primary = new StringBuilder(getMaxCodeLen()); +939 private final StringBuilder alternate = new StringBuilder(getMaxCodeLen()); +940 private final int maxLength; +941 +942 public DoubleMetaphoneResult(final int maxLength) { +943 this.maxLength = maxLength; +944 } +945 +946 public void append(final char value) { +947 appendPrimary(value); +948 appendAlternate(value); +949 } +950 +951 public void append(final char primary, final char alternate) { +952 appendPrimary(primary); +953 appendAlternate(alternate); +954 } +955 +956 public void appendPrimary(final char value) { +957 if (this.primary.length() < this.maxLength) { +958 this.primary.append(value); +959 } +960 } +961 +962 public void appendAlternate(final char value) { +963 if (this.alternate.length() < this.maxLength) { +964 this.alternate.append(value); +965 } +966 } +967 +968 public void append(final String value) { +969 appendPrimary(value); +970 appendAlternate(value); +971 } +972 +973 public void append(final String primary, final String alternate) { +974 appendPrimary(primary); +975 appendAlternate(alternate); +976 } +977 +978 public void appendPrimary(final String value) { +979 final int addChars = this.maxLength - this.primary.length(); +980 if (value.length() <= addChars) { +981 this.primary.append(value); +982 } else { +983 this.primary.append(value.substring(0, addChars)); +984 } +985 } +986 +987 public void appendAlternate(final String value) { +988 final int addChars = this.maxLength - this.alternate.length(); +989 if (value.length() <= addChars) { +990 this.alternate.append(value); +991 } else { +992 this.alternate.append(value.substring(0, addChars)); +993 } +994 } +995 +996 public String getPrimary() { +997 return this.primary.toString(); +998 } +999 +1000 public String getAlternate() { +1001 return this.alternate.toString(); +1002 } +1003 +1004 public boolean isComplete() { +1005 return this.primary.length() >= this.maxLength && +1006 this.alternate.length() >= this.maxLength; +1007 } +1008 } +1009} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022import org.apache.commons.codec.binary.StringUtils; +023 +024/** +025 * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence +026 * Philips</CITE>. +027 * <p> +028 * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable +029 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is +030 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication +031 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup. +032 * +033 * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a> +034 * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a> +035 * +036 * @version $Id$ +037 */ +038public class DoubleMetaphone implements StringEncoder { +039 +040 /** +041 * "Vowels" to test for +042 */ +043 private static final String VOWELS = "AEIOUY"; +044 +045 /** +046 * Prefixes when present which are not pronounced +047 */ +048 private static final String[] SILENT_START = +049 { "GN", "KN", "PN", "WR", "PS" }; +050 private static final String[] L_R_N_M_B_H_F_V_W_SPACE = +051 { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " }; +052 private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = +053 { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" }; +054 private static final String[] L_T_K_S_N_M_B_Z = +055 { "L", "T", "K", "S", "N", "M", "B", "Z" }; +056 +057 /** +058 * Maximum length of an encoding, default is 4 +059 */ +060 private int maxCodeLen = 4; +061 +062 /** +063 * Creates an instance of this DoubleMetaphone encoder +064 */ +065 public DoubleMetaphone() { +066 super(); +067 } +068 +069 /** +070 * Encode a value with Double Metaphone. +071 * +072 * @param value String to encode +073 * @return an encoded string +074 */ +075 public String doubleMetaphone(final String value) { +076 return doubleMetaphone(value, false); +077 } +078 +079 /** +080 * Encode a value with Double Metaphone, optionally using the alternate encoding. +081 * +082 * @param value String to encode +083 * @param alternate use alternate encode +084 * @return an encoded string +085 */ +086 public String doubleMetaphone(String value, final boolean alternate) { +087 value = cleanInput(value); +088 if (value == null) { +089 return null; +090 } +091 +092 final boolean slavoGermanic = isSlavoGermanic(value); +093 int index = isSilentStart(value) ? 1 : 0; +094 +095 final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen()); +096 +097 while (!result.isComplete() && index <= value.length() - 1) { +098 switch (value.charAt(index)) { +099 case 'A': +100 case 'E': +101 case 'I': +102 case 'O': +103 case 'U': +104 case 'Y': +105 index = handleAEIOUY(result, index); +106 break; +107 case 'B': +108 result.append('P'); +109 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1; +110 break; +111 case '\u00C7': +112 // A C with a Cedilla +113 result.append('S'); +114 index++; +115 break; +116 case 'C': +117 index = handleC(value, result, index); +118 break; +119 case 'D': +120 index = handleD(value, result, index); +121 break; +122 case 'F': +123 result.append('F'); +124 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1; +125 break; +126 case 'G': +127 index = handleG(value, result, index, slavoGermanic); +128 break; +129 case 'H': +130 index = handleH(value, result, index); +131 break; +132 case 'J': +133 index = handleJ(value, result, index, slavoGermanic); +134 break; +135 case 'K': +136 result.append('K'); +137 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1; +138 break; +139 case 'L': +140 index = handleL(value, result, index); +141 break; +142 case 'M': +143 result.append('M'); +144 index = conditionM0(value, index) ? index + 2 : index + 1; +145 break; +146 case 'N': +147 result.append('N'); +148 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1; +149 break; +150 case '\u00D1': +151 // N with a tilde (spanish ene) +152 result.append('N'); +153 index++; +154 break; +155 case 'P': +156 index = handleP(value, result, index); +157 break; +158 case 'Q': +159 result.append('K'); +160 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1; +161 break; +162 case 'R': +163 index = handleR(value, result, index, slavoGermanic); +164 break; +165 case 'S': +166 index = handleS(value, result, index, slavoGermanic); +167 break; +168 case 'T': +169 index = handleT(value, result, index); +170 break; +171 case 'V': +172 result.append('F'); +173 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1; +174 break; +175 case 'W': +176 index = handleW(value, result, index); +177 break; +178 case 'X': +179 index = handleX(value, result, index); +180 break; +181 case 'Z': +182 index = handleZ(value, result, index, slavoGermanic); +183 break; +184 default: +185 index++; +186 break; +187 } +188 } +189 +190 return alternate ? result.getAlternate() : result.getPrimary(); +191 } +192 +193 /** +194 * Encode the value using DoubleMetaphone. It will only work if +195 * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>). +196 * +197 * @param obj Object to encode (should be of type String) +198 * @return An encoded Object (will be of type String) +199 * @throws EncoderException encode parameter is not of type String +200 */ +201 @Override +202 public Object encode(final Object obj) throws EncoderException { +203 if (!(obj instanceof String)) { +204 throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); +205 } +206 return doubleMetaphone((String) obj); +207 } +208 +209 /** +210 * Encode the value using DoubleMetaphone. +211 * +212 * @param value String to encode +213 * @return An encoded String +214 */ +215 @Override +216 public String encode(final String value) { +217 return doubleMetaphone(value); +218 } +219 +220 /** +221 * Check if the Double Metaphone values of two <code>String</code> values +222 * are equal. +223 * +224 * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. +225 * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. +226 * @return <code>true</code> if the encoded <code>String</code>s are equal; +227 * <code>false</code> otherwise. +228 * @see #isDoubleMetaphoneEqual(String,String,boolean) +229 */ +230 public boolean isDoubleMetaphoneEqual(final String value1, final String value2) { +231 return isDoubleMetaphoneEqual(value1, value2, false); +232 } +233 +234 /** +235 * Check if the Double Metaphone values of two <code>String</code> values +236 * are equal, optionally using the alternate value. +237 * +238 * @param value1 The left-hand side of the encoded {@link String#equals(Object)}. +239 * @param value2 The right-hand side of the encoded {@link String#equals(Object)}. +240 * @param alternate use the alternate value if <code>true</code>. +241 * @return <code>true</code> if the encoded <code>String</code>s are equal; +242 * <code>false</code> otherwise. +243 */ +244 public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) { +245 return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate)); +246 } +247 +248 /** +249 * Returns the maxCodeLen. +250 * @return int +251 */ +252 public int getMaxCodeLen() { +253 return this.maxCodeLen; +254 } +255 +256 /** +257 * Sets the maxCodeLen. +258 * @param maxCodeLen The maxCodeLen to set +259 */ +260 public void setMaxCodeLen(final int maxCodeLen) { +261 this.maxCodeLen = maxCodeLen; +262 } +263 +264 //-- BEGIN HANDLERS --// +265 +266 /** +267 * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases. +268 */ +269 private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) { +270 if (index == 0) { +271 result.append('A'); +272 } +273 return index + 1; +274 } +275 +276 /** +277 * Handles 'C' cases. +278 */ +279 private int handleC(final String value, final DoubleMetaphoneResult result, int index) { +280 if (conditionC0(value, index)) { // very confusing, moved out +281 result.append('K'); +282 index += 2; +283 } else if (index == 0 && contains(value, index, 6, "CAESAR")) { +284 result.append('S'); +285 index += 2; +286 } else if (contains(value, index, 2, "CH")) { +287 index = handleCH(value, result, index); +288 } else if (contains(value, index, 2, "CZ") && +289 !contains(value, index - 2, 4, "WICZ")) { +290 //-- "Czerny" --// +291 result.append('S', 'X'); +292 index += 2; +293 } else if (contains(value, index + 1, 3, "CIA")) { +294 //-- "focaccia" --// +295 result.append('X'); +296 index += 3; +297 } else if (contains(value, index, 2, "CC") && +298 !(index == 1 && charAt(value, 0) == 'M')) { +299 //-- double "cc" but not "McClelland" --// +300 return handleCC(value, result, index); +301 } else if (contains(value, index, 2, "CK", "CG", "CQ")) { +302 result.append('K'); +303 index += 2; +304 } else if (contains(value, index, 2, "CI", "CE", "CY")) { +305 //-- Italian vs. English --// +306 if (contains(value, index, 3, "CIO", "CIE", "CIA")) { +307 result.append('S', 'X'); +308 } else { +309 result.append('S'); +310 } +311 index += 2; +312 } else { +313 result.append('K'); +314 if (contains(value, index + 1, 2, " C", " Q", " G")) { +315 //-- Mac Caffrey, Mac Gregor --// +316 index += 3; +317 } else if (contains(value, index + 1, 1, "C", "K", "Q") && +318 !contains(value, index + 1, 2, "CE", "CI")) { +319 index += 2; +320 } else { +321 index++; +322 } +323 } +324 +325 return index; +326 } +327 +328 /** +329 * Handles 'CC' cases. +330 */ +331 private int handleCC(final String value, final DoubleMetaphoneResult result, int index) { +332 if (contains(value, index + 2, 1, "I", "E", "H") && +333 !contains(value, index + 2, 2, "HU")) { +334 //-- "bellocchio" but not "bacchus" --// +335 if ((index == 1 && charAt(value, index - 1) == 'A') || +336 contains(value, index - 1, 5, "UCCEE", "UCCES")) { +337 //-- "accident", "accede", "succeed" --// +338 result.append("KS"); +339 } else { +340 //-- "bacci", "bertucci", other Italian --// +341 result.append('X'); +342 } +343 index += 3; +344 } else { // Pierce's rule +345 result.append('K'); +346 index += 2; +347 } +348 +349 return index; +350 } +351 +352 /** +353 * Handles 'CH' cases. +354 */ +355 private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) { +356 if (index > 0 && contains(value, index, 4, "CHAE")) { // Michael +357 result.append('K', 'X'); +358 return index + 2; +359 } else if (conditionCH0(value, index)) { +360 //-- Greek roots ("chemistry", "chorus", etc.) --// +361 result.append('K'); +362 return index + 2; +363 } else if (conditionCH1(value, index)) { +364 //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --// +365 result.append('K'); +366 return index + 2; +367 } else { +368 if (index > 0) { +369 if (contains(value, 0, 2, "MC")) { +370 result.append('K'); +371 } else { +372 result.append('X', 'K'); +373 } +374 } else { +375 result.append('X'); +376 } +377 return index + 2; +378 } +379 } +380 +381 /** +382 * Handles 'D' cases. +383 */ +384 private int handleD(final String value, final DoubleMetaphoneResult result, int index) { +385 if (contains(value, index, 2, "DG")) { +386 //-- "Edge" --// +387 if (contains(value, index + 2, 1, "I", "E", "Y")) { +388 result.append('J'); +389 index += 3; +390 //-- "Edgar" --// +391 } else { +392 result.append("TK"); +393 index += 2; +394 } +395 } else if (contains(value, index, 2, "DT", "DD")) { +396 result.append('T'); +397 index += 2; +398 } else { +399 result.append('T'); +400 index++; +401 } +402 return index; +403 } +404 +405 /** +406 * Handles 'G' cases. +407 */ +408 private int handleG(final String value, final DoubleMetaphoneResult result, int index, +409 final boolean slavoGermanic) { +410 if (charAt(value, index + 1) == 'H') { +411 index = handleGH(value, result, index); +412 } else if (charAt(value, index + 1) == 'N') { +413 if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) { +414 result.append("KN", "N"); +415 } else if (!contains(value, index + 2, 2, "EY") && +416 charAt(value, index + 1) != 'Y' && !slavoGermanic) { +417 result.append("N", "KN"); +418 } else { +419 result.append("KN"); +420 } +421 index = index + 2; +422 } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) { +423 result.append("KL", "L"); +424 index += 2; +425 } else if (index == 0 && +426 (charAt(value, index + 1) == 'Y' || +427 contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) { +428 //-- -ges-, -gep-, -gel-, -gie- at beginning --// +429 result.append('K', 'J'); +430 index += 2; +431 } else if ((contains(value, index + 1, 2, "ER") || +432 charAt(value, index + 1) == 'Y') && +433 !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") && +434 !contains(value, index - 1, 1, "E", "I") && +435 !contains(value, index - 1, 3, "RGY", "OGY")) { +436 //-- -ger-, -gy- --// +437 result.append('K', 'J'); +438 index += 2; +439 } else if (contains(value, index + 1, 1, "E", "I", "Y") || +440 contains(value, index - 1, 4, "AGGI", "OGGI")) { +441 //-- Italian "biaggi" --// +442 if (contains(value, 0 ,4, "VAN ", "VON ") || +443 contains(value, 0, 3, "SCH") || +444 contains(value, index + 1, 2, "ET")) { +445 //-- obvious germanic --// +446 result.append('K'); +447 } else if (contains(value, index + 1, 3, "IER")) { +448 result.append('J'); +449 } else { +450 result.append('J', 'K'); +451 } +452 index += 2; +453 } else if (charAt(value, index + 1) == 'G') { +454 index += 2; +455 result.append('K'); +456 } else { +457 index++; +458 result.append('K'); +459 } +460 return index; +461 } +462 +463 /** +464 * Handles 'GH' cases. +465 */ +466 private int handleGH(final String value, final DoubleMetaphoneResult result, int index) { +467 if (index > 0 && !isVowel(charAt(value, index - 1))) { +468 result.append('K'); +469 index += 2; +470 } else if (index == 0) { +471 if (charAt(value, index + 2) == 'I') { +472 result.append('J'); +473 } else { +474 result.append('K'); +475 } +476 index += 2; +477 } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) || +478 (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) || +479 (index > 3 && contains(value, index - 4, 1, "B", "H"))) { +480 //-- Parker's rule (with some further refinements) - "hugh" +481 index += 2; +482 } else { +483 if (index > 2 && charAt(value, index - 1) == 'U' && +484 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) { +485 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough" +486 result.append('F'); +487 } else if (index > 0 && charAt(value, index - 1) != 'I') { +488 result.append('K'); +489 } +490 index += 2; +491 } +492 return index; +493 } +494 +495 /** +496 * Handles 'H' cases. +497 */ +498 private int handleH(final String value, final DoubleMetaphoneResult result, int index) { +499 //-- only keep if first & before vowel or between 2 vowels --// +500 if ((index == 0 || isVowel(charAt(value, index - 1))) && +501 isVowel(charAt(value, index + 1))) { +502 result.append('H'); +503 index += 2; +504 //-- also takes car of "HH" --// +505 } else { +506 index++; +507 } +508 return index; +509 } +510 +511 /** +512 * Handles 'J' cases. +513 */ +514 private int handleJ(final String value, final DoubleMetaphoneResult result, int index, +515 final boolean slavoGermanic) { +516 if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) { +517 //-- obvious Spanish, "Jose", "San Jacinto" --// +518 if ((index == 0 && (charAt(value, index + 4) == ' ') || +519 value.length() == 4) || contains(value, 0, 4, "SAN ")) { +520 result.append('H'); +521 } else { +522 result.append('J', 'H'); +523 } +524 index++; +525 } else { +526 if (index == 0 && !contains(value, index, 4, "JOSE")) { +527 result.append('J', 'A'); +528 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && +529 (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) { +530 result.append('J', 'H'); +531 } else if (index == value.length() - 1) { +532 result.append('J', ' '); +533 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && +534 !contains(value, index - 1, 1, "S", "K", "L")) { +535 result.append('J'); +536 } +537 +538 if (charAt(value, index + 1) == 'J') { +539 index += 2; +540 } else { +541 index++; +542 } +543 } +544 return index; +545 } +546 +547 /** +548 * Handles 'L' cases. +549 */ +550 private int handleL(final String value, final DoubleMetaphoneResult result, int index) { +551 if (charAt(value, index + 1) == 'L') { +552 if (conditionL0(value, index)) { +553 result.appendPrimary('L'); +554 } else { +555 result.append('L'); +556 } +557 index += 2; +558 } else { +559 index++; +560 result.append('L'); +561 } +562 return index; +563 } +564 +565 /** +566 * Handles 'P' cases. +567 */ +568 private int handleP(final String value, final DoubleMetaphoneResult result, int index) { +569 if (charAt(value, index + 1) == 'H') { +570 result.append('F'); +571 index += 2; +572 } else { +573 result.append('P'); +574 index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1; +575 } +576 return index; +577 } +578 +579 /** +580 * Handles 'R' cases. +581 */ +582 private int handleR(final String value, final DoubleMetaphoneResult result, final int index, +583 final boolean slavoGermanic) { +584 if (index == value.length() - 1 && !slavoGermanic && +585 contains(value, index - 2, 2, "IE") && +586 !contains(value, index - 4, 2, "ME", "MA")) { +587 result.appendAlternate('R'); +588 } else { +589 result.append('R'); +590 } +591 return charAt(value, index + 1) == 'R' ? index + 2 : index + 1; +592 } +593 +594 /** +595 * Handles 'S' cases. +596 */ +597 private int handleS(final String value, final DoubleMetaphoneResult result, int index, +598 final boolean slavoGermanic) { +599 if (contains(value, index - 1, 3, "ISL", "YSL")) { +600 //-- special cases "island", "isle", "carlisle", "carlysle" --// +601 index++; +602 } else if (index == 0 && contains(value, index, 5, "SUGAR")) { +603 //-- special case "sugar-" --// +604 result.append('X', 'S'); +605 index++; +606 } else if (contains(value, index, 2, "SH")) { +607 if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) { +608 //-- germanic --// +609 result.append('S'); +610 } else { +611 result.append('X'); +612 } +613 index += 2; +614 } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) { +615 //-- Italian and Armenian --// +616 if (slavoGermanic) { +617 result.append('S'); +618 } else { +619 result.append('S', 'X'); +620 } +621 index += 3; +622 } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || +623 contains(value, index + 1, 1, "Z")) { +624 //-- german & anglicisations, e.g. "smith" match "schmidt" // +625 // "snider" match "schneider" --// +626 //-- also, -sz- in slavic language although in hungarian it // +627 // is pronounced "s" --// +628 result.append('S', 'X'); +629 index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1; +630 } else if (contains(value, index, 2, "SC")) { +631 index = handleSC(value, result, index); +632 } else { +633 if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) { +634 //-- french e.g. "resnais", "artois" --// +635 result.appendAlternate('S'); +636 } else { +637 result.append('S'); +638 } +639 index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1; +640 } +641 return index; +642 } +643 +644 /** +645 * Handles 'SC' cases. +646 */ +647 private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) { +648 if (charAt(value, index + 2) == 'H') { +649 //-- Schlesinger's rule --// +650 if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) { +651 //-- Dutch origin, e.g. "school", "schooner" --// +652 if (contains(value, index + 3, 2, "ER", "EN")) { +653 //-- "schermerhorn", "schenker" --// +654 result.append("X", "SK"); +655 } else { +656 result.append("SK"); +657 } +658 } else { +659 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') { +660 result.append('X', 'S'); +661 } else { +662 result.append('X'); +663 } +664 } +665 } else if (contains(value, index + 2, 1, "I", "E", "Y")) { +666 result.append('S'); +667 } else { +668 result.append("SK"); +669 } +670 return index + 3; +671 } +672 +673 /** +674 * Handles 'T' cases. +675 */ +676 private int handleT(final String value, final DoubleMetaphoneResult result, int index) { +677 if (contains(value, index, 4, "TION")) { +678 result.append('X'); +679 index += 3; +680 } else if (contains(value, index, 3, "TIA", "TCH")) { +681 result.append('X'); +682 index += 3; +683 } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) { +684 if (contains(value, index + 2, 2, "OM", "AM") || +685 //-- special case "thomas", "thames" or germanic --// +686 contains(value, 0, 4, "VAN ", "VON ") || +687 contains(value, 0, 3, "SCH")) { +688 result.append('T'); +689 } else { +690 result.append('0', 'T'); +691 } +692 index += 2; +693 } else { +694 result.append('T'); +695 index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1; +696 } +697 return index; +698 } +699 +700 /** +701 * Handles 'W' cases. +702 */ +703 private int handleW(final String value, final DoubleMetaphoneResult result, int index) { +704 if (contains(value, index, 2, "WR")) { +705 //-- can also be in middle of word --// +706 result.append('R'); +707 index += 2; +708 } else { +709 if (index == 0 && (isVowel(charAt(value, index + 1)) || +710 contains(value, index, 2, "WH"))) { +711 if (isVowel(charAt(value, index + 1))) { +712 //-- Wasserman should match Vasserman --// +713 result.append('A', 'F'); +714 } else { +715 //-- need Uomo to match Womo --// +716 result.append('A'); +717 } +718 index++; +719 } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) || +720 contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") || +721 contains(value, 0, 3, "SCH")) { +722 //-- Arnow should match Arnoff --// +723 result.appendAlternate('F'); +724 index++; +725 } else if (contains(value, index, 4, "WICZ", "WITZ")) { +726 //-- Polish e.g. "filipowicz" --// +727 result.append("TS", "FX"); +728 index += 4; +729 } else { +730 index++; +731 } +732 } +733 return index; +734 } +735 +736 /** +737 * Handles 'X' cases. +738 */ +739 private int handleX(final String value, final DoubleMetaphoneResult result, int index) { +740 if (index == 0) { +741 result.append('S'); +742 index++; +743 } else { +744 if (!((index == value.length() - 1) && +745 (contains(value, index - 3, 3, "IAU", "EAU") || +746 contains(value, index - 2, 2, "AU", "OU")))) { +747 //-- French e.g. breaux --// +748 result.append("KS"); +749 } +750 index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1; +751 } +752 return index; +753 } +754 +755 /** +756 * Handles 'Z' cases. +757 */ +758 private int handleZ(final String value, final DoubleMetaphoneResult result, int index, +759 final boolean slavoGermanic) { +760 if (charAt(value, index + 1) == 'H') { +761 //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --// +762 result.append('J'); +763 index += 2; +764 } else { +765 if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || +766 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) { +767 result.append("S", "TS"); +768 } else { +769 result.append('S'); +770 } +771 index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1; +772 } +773 return index; +774 } +775 +776 //-- BEGIN CONDITIONS --// +777 +778 /** +779 * Complex condition 0 for 'C'. +780 */ +781 private boolean conditionC0(final String value, final int index) { +782 if (contains(value, index, 4, "CHIA")) { +783 return true; +784 } else if (index <= 1) { +785 return false; +786 } else if (isVowel(charAt(value, index - 2))) { +787 return false; +788 } else if (!contains(value, index - 1, 3, "ACH")) { +789 return false; +790 } else { +791 final char c = charAt(value, index + 2); +792 return (c != 'I' && c != 'E') || +793 contains(value, index - 2, 6, "BACHER", "MACHER"); +794 } +795 } +796 +797 /** +798 * Complex condition 0 for 'CH'. +799 */ +800 private boolean conditionCH0(final String value, final int index) { +801 if (index != 0) { +802 return false; +803 } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && +804 !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) { +805 return false; +806 } else if (contains(value, 0, 5, "CHORE")) { +807 return false; +808 } else { +809 return true; +810 } +811 } +812 +813 /** +814 * Complex condition 1 for 'CH'. +815 */ +816 private boolean conditionCH1(final String value, final int index) { +817 return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || +818 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") || +819 contains(value, index + 2, 1, "T", "S") || +820 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) && +821 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1))); +822 } +823 +824 /** +825 * Complex condition 0 for 'L'. +826 */ +827 private boolean conditionL0(final String value, final int index) { +828 if (index == value.length() - 3 && +829 contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) { +830 return true; +831 } else if ((contains(value, value.length() - 2, 2, "AS", "OS") || +832 contains(value, value.length() - 1, 1, "A", "O")) && +833 contains(value, index - 1, 4, "ALLE")) { +834 return true; +835 } else { +836 return false; +837 } +838 } +839 +840 /** +841 * Complex condition 0 for 'M'. +842 */ +843 private boolean conditionM0(final String value, final int index) { +844 if (charAt(value, index + 1) == 'M') { +845 return true; +846 } +847 return contains(value, index - 1, 3, "UMB") && +848 ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER")); +849 } +850 +851 //-- BEGIN HELPER FUNCTIONS --// +852 +853 /** +854 * Determines whether or not a value is of slavo-germanic origin. A value is +855 * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'. +856 */ +857 private boolean isSlavoGermanic(final String value) { +858 return value.indexOf('W') > -1 || value.indexOf('K') > -1 || +859 value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1; +860 } +861 +862 /** +863 * Determines whether or not a character is a vowel or not +864 */ +865 private boolean isVowel(final char ch) { +866 return VOWELS.indexOf(ch) != -1; +867 } +868 +869 /** +870 * Determines whether or not the value starts with a silent letter. It will +871 * return <code>true</code> if the value starts with any of 'GN', 'KN', +872 * 'PN', 'WR' or 'PS'. +873 */ +874 private boolean isSilentStart(final String value) { +875 boolean result = false; +876 for (final String element : SILENT_START) { +877 if (value.startsWith(element)) { +878 result = true; +879 break; +880 } +881 } +882 return result; +883 } +884 +885 /** +886 * Cleans the input. +887 */ +888 private String cleanInput(String input) { +889 if (input == null) { +890 return null; +891 } +892 input = input.trim(); +893 if (input.length() == 0) { +894 return null; +895 } +896 return input.toUpperCase(java.util.Locale.ENGLISH); +897 } +898 +899 /* +900 * Gets the character at index <code>index</code> if available, otherwise +901 * it returns <code>Character.MIN_VALUE</code> so that there is some sort +902 * of a default. +903 */ +904 protected char charAt(final String value, final int index) { +905 if (index < 0 || index >= value.length()) { +906 return Character.MIN_VALUE; +907 } +908 return value.charAt(index); +909 } +910 +911 /* +912 * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and +913 * matching up to length <code>length</code>. +914 */ +915 protected static boolean contains(final String value, final int start, final int length, +916 final String... criteria) { +917 boolean result = false; +918 if (start >= 0 && start + length <= value.length()) { +919 final String target = value.substring(start, start + length); +920 +921 for (final String element : criteria) { +922 if (target.equals(element)) { +923 result = true; +924 break; +925 } +926 } +927 } +928 return result; +929 } +930 +931 //-- BEGIN INNER CLASSES --// +932 +933 /** +934 * Inner class for storing results, since there is the optional alternate encoding. +935 */ +936 public class DoubleMetaphoneResult { +937 +938 private final StringBuilder primary = new StringBuilder(getMaxCodeLen()); +939 private final StringBuilder alternate = new StringBuilder(getMaxCodeLen()); +940 private final int maxLength; +941 +942 public DoubleMetaphoneResult(final int maxLength) { +943 this.maxLength = maxLength; +944 } +945 +946 public void append(final char value) { +947 appendPrimary(value); +948 appendAlternate(value); +949 } +950 +951 public void append(final char primary, final char alternate) { +952 appendPrimary(primary); +953 appendAlternate(alternate); +954 } +955 +956 public void appendPrimary(final char value) { +957 if (this.primary.length() < this.maxLength) { +958 this.primary.append(value); +959 } +960 } +961 +962 public void appendAlternate(final char value) { +963 if (this.alternate.length() < this.maxLength) { +964 this.alternate.append(value); +965 } +966 } +967 +968 public void append(final String value) { +969 appendPrimary(value); +970 appendAlternate(value); +971 } +972 +973 public void append(final String primary, final String alternate) { +974 appendPrimary(primary); +975 appendAlternate(alternate); +976 } +977 +978 public void appendPrimary(final String value) { +979 final int addChars = this.maxLength - this.primary.length(); +980 if (value.length() <= addChars) { +981 this.primary.append(value); +982 } else { +983 this.primary.append(value.substring(0, addChars)); +984 } +985 } +986 +987 public void appendAlternate(final String value) { +988 final int addChars = this.maxLength - this.alternate.length(); +989 if (value.length() <= addChars) { +990 this.alternate.append(value); +991 } else { +992 this.alternate.append(value.substring(0, addChars)); +993 } +994 } +995 +996 public String getPrimary() { +997 return this.primary.toString(); +998 } +999 +1000 public String getAlternate() { +1001 return this.alternate.toString(); +1002 } +1003 +1004 public boolean isComplete() { +1005 return this.primary.length() >= this.maxLength && +1006 this.alternate.length() >= this.maxLength; +1007 } +1008 } +1009} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017package org.apache.commons.codec.language; +018 +019import java.util.Locale; +020 +021import org.apache.commons.codec.EncoderException; +022import org.apache.commons.codec.StringEncoder; +023 +024/** +025 * Match Rating Approach Phonetic Algorithm Developed by <CITE>Western Airlines</CITE> in 1977. +026 * +027 * This class is immutable and thread-safe. +028 * +029 * @see <a href="http://en.wikipedia.org/wiki/Match_rating_approach">Wikipedia - Match Rating Approach</a> +030 * @since 1.8 +031 */ +032public class MatchRatingApproachEncoder implements StringEncoder { +033 +034 private static final String SPACE = " "; +035 +036 private static final String EMPTY = ""; +037 +038 /** +039 * Constants used mainly for the min rating value. +040 */ +041 private static final int ONE = 1, TWO = 2, THREE = 3, FOUR = 4, FIVE = 5, SIX = 6, SEVEN = 7, +042 ELEVEN = 11, TWELVE = 12; +043 +044 /** +045 * The plain letter equivalent of the accented letters. +046 */ +047 private static final String PLAIN_ASCII = "AaEeIiOoUu" + // grave +048 "AaEeIiOoUuYy" + // acute +049 "AaEeIiOoUuYy" + // circumflex +050 "AaOoNn" + // tilde +051 "AaEeIiOoUuYy" + // umlaut +052 "Aa" + // ring +053 "Cc" + // cedilla +054 "OoUu"; // double acute +055 +056 /** +057 * Unicode characters corresponding to various accented letters. For example: \u00DA is U acute etc... +058 */ +059 private static final String UNICODE = "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9" + +060 "\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" + +061 "\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" + +062 "\u00C3\u00E3\u00D5\u00F5\u00D1\u00F1" + +063 "\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" + +064 "\u00C5\u00E5" + "\u00C7\u00E7" + "\u0150\u0151\u0170\u0171"; +065 +066 private static final String[] DOUBLE_CONSONANT = +067 new String[] { "BB", "CC", "DD", "FF", "GG", "HH", "JJ", "KK", "LL", "MM", "NN", "PP", "QQ", "RR", "SS", +068 "TT", "VV", "WW", "XX", "YY", "ZZ" }; +069 +070 /** +071 * Cleans up a name: 1. Upper-cases everything 2. Removes some common punctuation 3. Removes accents 4. Removes any +072 * spaces. +073 * +074 * <h2>API Usage</h2> +075 * <p> +076 * Consider this method private, it is package protected for unit testing only. +077 * </p> +078 * +079 * @param name +080 * The name to be cleaned +081 * @return The cleaned name +082 */ +083 String cleanName(final String name) { +084 String upperName = name.toUpperCase(Locale.ENGLISH); +085 +086 final String[] charsToTrim = { "\\-", "[&]", "\\'", "\\.", "[\\,]" }; +087 for (final String str : charsToTrim) { +088 upperName = upperName.replaceAll(str, EMPTY); +089 } +090 +091 upperName = removeAccents(upperName); +092 upperName = upperName.replaceAll("\\s+", EMPTY); +093 +094 return upperName; +095 } +096 +097 /** +098 * Encodes an Object using the Match Rating Approach algorithm. Method is here to satisfy the requirements of the +099 * Encoder interface Throws an EncoderException if input object is not of type java.lang.String. +100 * +101 * @param pObject +102 * Object to encode +103 * @return An object (or type java.lang.String) containing the Match Rating Approach code which corresponds to the +104 * String supplied. +105 * @throws EncoderException +106 * if the parameter supplied is not of type java.lang.String +107 */ +108 @Override +109 public final Object encode(final Object pObject) throws EncoderException { +110 if (!(pObject instanceof String)) { +111 throw new EncoderException( +112 "Parameter supplied to Match Rating Approach encoder is not of type java.lang.String"); +113 } +114 return encode((String) pObject); +115 } +116 +117 /** +118 * Encodes a String using the Match Rating Approach (MRA) algorithm. +119 * +120 * @param name +121 * String object to encode +122 * @return The MRA code corresponding to the String supplied +123 */ +124 @Override +125 public final String encode(String name) { +126 // Bulletproof for trivial input - NINO +127 if (name == null || EMPTY.equalsIgnoreCase(name) || SPACE.equalsIgnoreCase(name) || name.length() == 1) { +128 return EMPTY; +129 } +130 +131 // Preprocessing +132 name = cleanName(name); +133 +134 // BEGIN: Actual encoding part of the algorithm... +135 // 1. Delete all vowels unless the vowel begins the word +136 name = removeVowels(name); +137 +138 // 2. Remove second consonant from any double consonant +139 name = removeDoubleConsonants(name); +140 +141 // 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters +142 name = getFirst3Last3(name); +143 +144 return name; +145 } +146 +147 /** +148 * Gets the first and last 3 letters of a name (if > 6 characters) Else just returns the name. +149 * +150 * <h2>API Usage</h2> +151 * <p> +152 * Consider this method private, it is package protected for unit testing only. +153 * </p> +154 * +155 * @param name +156 * The string to get the substrings from +157 * @return Annexed first and last 3 letters of input word. +158 */ +159 String getFirst3Last3(final String name) { +160 final int nameLength = name.length(); +161 +162 if (nameLength > SIX) { +163 final String firstThree = name.substring(0, THREE); +164 final String lastThree = name.substring(nameLength - THREE, nameLength); +165 return firstThree + lastThree; +166 } +167 return name; +168 } +169 +170 /** +171 * Obtains the min rating of the length sum of the 2 names. In essence the larger the sum length the smaller the +172 * min rating. Values strictly from documentation. +173 * +174 * <h2>API Usage</h2> +175 * <p> +176 * Consider this method private, it is package protected for unit testing only. +177 * </p> +178 * +179 * @param sumLength +180 * The length of 2 strings sent down +181 * @return The min rating value +182 */ +183 int getMinRating(final int sumLength) { +184 int minRating = 0; +185 +186 if (sumLength <= FOUR) { +187 minRating = FIVE; +188 } else if (sumLength <= SEVEN) { // aready know it is at least 5 +189 minRating = FOUR; +190 } else if (sumLength <= ELEVEN) { // aready know it is at least 8 +191 minRating = THREE; +192 } else if (sumLength == TWELVE) { +193 minRating = TWO; +194 } else { +195 minRating = ONE; // docs said little here. +196 } +197 +198 return minRating; +199 } +200 +201 /** +202 * Determines if two names are homophonous via Match Rating Approach (MRA) algorithm. It should be noted that the +203 * strings are cleaned in the same way as {@link #encode(String)}. +204 * +205 * @param name1 +206 * First of the 2 strings (names) to compare +207 * @param name2 +208 * Second of the 2 names to compare +209 * @return <code>true</code> if the encodings are identical <code>false</code> otherwise. +210 */ +211 public boolean isEncodeEquals(String name1, String name2) { +212 // Bulletproof for trivial input - NINO +213 if (name1 == null || EMPTY.equalsIgnoreCase(name1) || SPACE.equalsIgnoreCase(name1)) { +214 return false; +215 } else if (name2 == null || EMPTY.equalsIgnoreCase(name2) || SPACE.equalsIgnoreCase(name2)) { +216 return false; +217 } else if (name1.length() == 1 || name2.length() == 1) { +218 return false; +219 } else if (name1.equalsIgnoreCase(name2)) { +220 return true; +221 } +222 +223 // Preprocessing +224 name1 = cleanName(name1); +225 name2 = cleanName(name2); +226 +227 // Actual MRA Algorithm +228 +229 // 1. Remove vowels +230 name1 = removeVowels(name1); +231 name2 = removeVowels(name2); +232 +233 // 2. Remove double consonants +234 name1 = removeDoubleConsonants(name1); +235 name2 = removeDoubleConsonants(name2); +236 +237 // 3. Reduce down to 3 letters +238 name1 = getFirst3Last3(name1); +239 name2 = getFirst3Last3(name2); +240 +241 // 4. Check for length difference - if 3 or greater then no similarity +242 // comparison is done +243 if (Math.abs(name1.length() - name2.length()) >= THREE) { +244 return false; +245 } +246 +247 // 5. Obtain the minimum rating value by calculating the length sum of the +248 // encoded Strings and sending it down. +249 final int sumLength = Math.abs(name1.length() + name2.length()); +250 int minRating = 0; +251 minRating = getMinRating(sumLength); +252 +253 // 6. Process the encoded Strings from left to right and remove any +254 // identical characters found from both Strings respectively. +255 final int count = leftToRightThenRightToLeftProcessing(name1, name2); +256 +257 // 7. Each PNI item that has a similarity rating equal to or greater than +258 // the min is considered to be a good candidate match +259 return count >= minRating; +260 +261 } +262 +263 /** +264 * Determines if a letter is a vowel. +265 * +266 * <h2>API Usage</h2> +267 * <p> +268 * Consider this method private, it is package protected for unit testing only. +269 * </p> +270 * +271 * @param letter +272 * The letter under investiagtion +273 * @return True if a vowel, else false +274 */ +275 boolean isVowel(final String letter) { +276 return letter.equalsIgnoreCase("E") || letter.equalsIgnoreCase("A") || letter.equalsIgnoreCase("O") || +277 letter.equalsIgnoreCase("I") || letter.equalsIgnoreCase("U"); +278 } +279 +280 /** +281 * Processes the names from left to right (first) then right to left removing identical letters in same positions. +282 * Then subtracts the longer string that remains from 6 and returns this. +283 * +284 * <h2>API Usage</h2> +285 * <p> +286 * Consider this method private, it is package protected for unit testing only. +287 * </p> +288 * +289 * @param name1 +290 * name2 +291 * @return the length as above +292 */ +293 int leftToRightThenRightToLeftProcessing(final String name1, final String name2) { +294 final char[] name1Char = name1.toCharArray(); +295 final char[] name2Char = name2.toCharArray(); +296 +297 final int name1Size = name1.length() - 1; +298 final int name2Size = name2.length() - 1; +299 +300 String name1LtRStart = EMPTY; +301 String name1LtREnd = EMPTY; +302 +303 String name2RtLStart = EMPTY; +304 String name2RtLEnd = EMPTY; +305 +306 for (int i = 0; i < name1Char.length; i++) { +307 if (i > name2Size) { +308 break; +309 } +310 +311 name1LtRStart = name1.substring(i, i + 1); +312 name1LtREnd = name1.substring(name1Size - i, name1Size - i + 1); +313 +314 name2RtLStart = name2.substring(i, i + 1); +315 name2RtLEnd = name2.substring(name2Size - i, name2Size - i + 1); +316 +317 // Left to right... +318 if (name1LtRStart.equals(name2RtLStart)) { +319 name1Char[i] = ' '; +320 name2Char[i] = ' '; +321 } +322 +323 // Right to left... +324 if (name1LtREnd.equals(name2RtLEnd)) { +325 name1Char[name1Size - i] = ' '; +326 name2Char[name2Size - i] = ' '; +327 } +328 } +329 +330 // Char arrays -> string & remove extraneous space +331 final String strA = new String(name1Char).replaceAll("\\s+", EMPTY); +332 final String strB = new String(name2Char).replaceAll("\\s+", EMPTY); +333 +334 // Final bit - subtract longest string from 6 and return this int value +335 if (strA.length() > strB.length()) { +336 return Math.abs(SIX - strA.length()); +337 } +338 return Math.abs(SIX - strB.length()); +339 } +340 +341 /** +342 * Removes accented letters and replaces with non-accented ascii equivalent Case is preserved. +343 * http://www.codecodex.com/wiki/Remove_accent_from_letters_%28ex_.%C3%A9_to_e%29 +344 * +345 * @param accentedWord +346 * The word that may have accents in it. +347 * @return De-accented word +348 */ +349 String removeAccents(final String accentedWord) { +350 if (accentedWord == null) { +351 return null; +352 } +353 +354 final StringBuilder sb = new StringBuilder(); +355 final int n = accentedWord.length(); +356 +357 for (int i = 0; i < n; i++) { +358 final char c = accentedWord.charAt(i); +359 final int pos = UNICODE.indexOf(c); +360 if (pos > -1) { +361 sb.append(PLAIN_ASCII.charAt(pos)); +362 } else { +363 sb.append(c); +364 } +365 } +366 +367 return sb.toString(); +368 } +369 +370 /** +371 * Replaces any double consonant pair with the single letter equivalent. +372 * +373 * <h2>API Usage</h2> +374 * <p> +375 * Consider this method private, it is package protected for unit testing only. +376 * </p> +377 * +378 * @param name +379 * String to have double consonants removed +380 * @return Single consonant word +381 */ +382 String removeDoubleConsonants(final String name) { +383 String replacedName = name.toUpperCase(Locale.ENGLISH); +384 for (final String dc : DOUBLE_CONSONANT) { +385 if (replacedName.contains(dc)) { +386 final String singleLetter = dc.substring(0, 1); +387 replacedName = replacedName.replace(dc, singleLetter); +388 } +389 } +390 return replacedName; +391 } +392 +393 /** +394 * Deletes all vowels unless the vowel begins the word. +395 * +396 * <h2>API Usage</h2> +397 * <p> +398 * Consider this method private, it is package protected for unit testing only. +399 * </p> +400 * +401 * @param name +402 * The name to have vowels removed +403 * @return De-voweled word +404 */ +405 String removeVowels(String name) { +406 // Extract first letter +407 final String firstLetter = name.substring(0, 1); +408 +409 name = name.replaceAll("A", EMPTY); +410 name = name.replaceAll("E", EMPTY); +411 name = name.replaceAll("I", EMPTY); +412 name = name.replaceAll("O", EMPTY); +413 name = name.replaceAll("U", EMPTY); +414 +415 name = name.replaceAll("\\s{2,}\\b", SPACE); +416 +417 // return isVowel(firstLetter) ? (firstLetter + name) : name; +418 if (isVowel(firstLetter)) { +419 return firstLetter + name; +420 } +421 return name; +422 } +423} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes a string into a Metaphone value. +025 * <p> +026 * Initial Java implementation by <CITE>William B. Brogden. December, 1997</CITE>. +027 * Permission given by <CITE>wbrogden</CITE> for code to be used anywhere. +028 * <p> +029 * <CITE>Hanging on the Metaphone</CITE> by <CITE>Lawrence Philips</CITE> in <CITE>Computer Language of Dec. 1990, +030 * p 39.</CITE> +031 * <p> +032 * Note, that this does not match the algorithm that ships with PHP, or the algorithm found in the Perl implementations: +033 * </p> +034 * <ul> +035 * <li><a href="http://search.cpan.org/~mschwern/Text-Metaphone-1.96/Metaphone.pm">Text:Metaphone-1.96</a> +036 * (broken link 4/30/2013) </li> +037 * <li><a href="https://metacpan.org/source/MSCHWERN/Text-Metaphone-1.96//Metaphone.pm">Text:Metaphone-1.96</a> +038 * (link checked 4/30/2013) </li> +039 * </ul> +040 * <p> +041 * They have had undocumented changes from the originally published algorithm. +042 * For more information, see <a href="https://issues.apache.org/jira/browse/CODEC-57">CODEC-57</a>. +043 * <p> +044 * This class is conditionally thread-safe. +045 * The instance field {@link #maxCodeLen} is mutable {@link #setMaxCodeLen(int)} +046 * but is not volatile, and accesses are not synchronized. +047 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization +048 * is used to ensure safe publication of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} +049 * after initial setup. +050 * +051 * @version $Id$ +052 */ +053public class Metaphone implements StringEncoder { +054 +055 /** +056 * Five values in the English language +057 */ +058 private static final String VOWELS = "AEIOU"; +059 +060 /** +061 * Variable used in Metaphone algorithm +062 */ +063 private static final String FRONTV = "EIY"; +064 +065 /** +066 * Variable used in Metaphone algorithm +067 */ +068 private static final String VARSON = "CSPTG"; +069 +070 /** +071 * The max code length for metaphone is 4 +072 */ +073 private int maxCodeLen = 4; +074 +075 /** +076 * Creates an instance of the Metaphone encoder +077 */ +078 public Metaphone() { +079 super(); +080 } +081 +082 /** +083 * Find the metaphone value of a String. This is similar to the +084 * soundex algorithm, but better at finding similar sounding words. +085 * All input is converted to upper case. +086 * Limitations: Input format is expected to be a single ASCII word +087 * with only characters in the A - Z range, no punctuation or numbers. +088 * +089 * @param txt String to find the metaphone code for +090 * @return A metaphone code corresponding to the String supplied +091 */ +092 public String metaphone(final String txt) { +093 boolean hard = false; +094 int txtLength; +095 if (txt == null || (txtLength = txt.length()) == 0) { +096 return ""; +097 } +098 // single character is itself +099 if (txtLength == 1) { +100 return txt.toUpperCase(java.util.Locale.ENGLISH); +101 } +102 +103 final char[] inwd = txt.toUpperCase(java.util.Locale.ENGLISH).toCharArray(); +104 +105 final StringBuilder local = new StringBuilder(40); // manipulate +106 final StringBuilder code = new StringBuilder(10); // output +107 // handle initial 2 characters exceptions +108 switch(inwd[0]) { +109 case 'K': +110 case 'G': +111 case 'P': /* looking for KN, etc*/ +112 if (inwd[1] == 'N') { +113 local.append(inwd, 1, inwd.length - 1); +114 } else { +115 local.append(inwd); +116 } +117 break; +118 case 'A': /* looking for AE */ +119 if (inwd[1] == 'E') { +120 local.append(inwd, 1, inwd.length - 1); +121 } else { +122 local.append(inwd); +123 } +124 break; +125 case 'W': /* looking for WR or WH */ +126 if (inwd[1] == 'R') { // WR -> R +127 local.append(inwd, 1, inwd.length - 1); +128 break; +129 } +130 if (inwd[1] == 'H') { +131 local.append(inwd, 1, inwd.length - 1); +132 local.setCharAt(0, 'W'); // WH -> W +133 } else { +134 local.append(inwd); +135 } +136 break; +137 case 'X': /* initial X becomes S */ +138 inwd[0] = 'S'; +139 local.append(inwd); +140 break; +141 default: +142 local.append(inwd); +143 } // now local has working string with initials fixed +144 +145 final int wdsz = local.length(); +146 int n = 0; +147 +148 while (code.length() < this.getMaxCodeLen() && +149 n < wdsz ) { // max code size of 4 works well +150 final char symb = local.charAt(n); +151 // remove duplicate letters except C +152 if (symb != 'C' && isPreviousChar( local, n, symb ) ) { +153 n++; +154 } else { // not dup +155 switch(symb) { +156 case 'A': +157 case 'E': +158 case 'I': +159 case 'O': +160 case 'U': +161 if (n == 0) { +162 code.append(symb); +163 } +164 break; // only use vowel if leading char +165 case 'B': +166 if ( isPreviousChar(local, n, 'M') && +167 isLastChar(wdsz, n) ) { // B is silent if word ends in MB +168 break; +169 } +170 code.append(symb); +171 break; +172 case 'C': // lots of C special cases +173 /* discard if SCI, SCE or SCY */ +174 if ( isPreviousChar(local, n, 'S') && +175 !isLastChar(wdsz, n) && +176 FRONTV.indexOf(local.charAt(n + 1)) >= 0 ) { +177 break; +178 } +179 if (regionMatch(local, n, "CIA")) { // "CIA" -> X +180 code.append('X'); +181 break; +182 } +183 if (!isLastChar(wdsz, n) && +184 FRONTV.indexOf(local.charAt(n + 1)) >= 0) { +185 code.append('S'); +186 break; // CI,CE,CY -> S +187 } +188 if (isPreviousChar(local, n, 'S') && +189 isNextChar(local, n, 'H') ) { // SCH->sk +190 code.append('K'); +191 break; +192 } +193 if (isNextChar(local, n, 'H')) { // detect CH +194 if (n == 0 && +195 wdsz >= 3 && +196 isVowel(local,2) ) { // CH consonant -> K consonant +197 code.append('K'); +198 } else { +199 code.append('X'); // CHvowel -> X +200 } +201 } else { +202 code.append('K'); +203 } +204 break; +205 case 'D': +206 if (!isLastChar(wdsz, n + 1) && +207 isNextChar(local, n, 'G') && +208 FRONTV.indexOf(local.charAt(n + 2)) >= 0) { // DGE DGI DGY -> J +209 code.append('J'); n += 2; +210 } else { +211 code.append('T'); +212 } +213 break; +214 case 'G': // GH silent at end or before consonant +215 if (isLastChar(wdsz, n + 1) && +216 isNextChar(local, n, 'H')) { +217 break; +218 } +219 if (!isLastChar(wdsz, n + 1) && +220 isNextChar(local,n,'H') && +221 !isVowel(local,n+2)) { +222 break; +223 } +224 if (n > 0 && +225 ( regionMatch(local, n, "GN") || +226 regionMatch(local, n, "GNED") ) ) { +227 break; // silent G +228 } +229 if (isPreviousChar(local, n, 'G')) { +230 // NOTE: Given that duplicated chars are removed, I don't see how this can ever be true +231 hard = true; +232 } else { +233 hard = false; +234 } +235 if (!isLastChar(wdsz, n) && +236 FRONTV.indexOf(local.charAt(n + 1)) >= 0 && +237 !hard) { +238 code.append('J'); +239 } else { +240 code.append('K'); +241 } +242 break; +243 case 'H': +244 if (isLastChar(wdsz, n)) { +245 break; // terminal H +246 } +247 if (n > 0 && +248 VARSON.indexOf(local.charAt(n - 1)) >= 0) { +249 break; +250 } +251 if (isVowel(local,n+1)) { +252 code.append('H'); // Hvowel +253 } +254 break; +255 case 'F': +256 case 'J': +257 case 'L': +258 case 'M': +259 case 'N': +260 case 'R': +261 code.append(symb); +262 break; +263 case 'K': +264 if (n > 0) { // not initial +265 if (!isPreviousChar(local, n, 'C')) { +266 code.append(symb); +267 } +268 } else { +269 code.append(symb); // initial K +270 } +271 break; +272 case 'P': +273 if (isNextChar(local,n,'H')) { +274 // PH -> F +275 code.append('F'); +276 } else { +277 code.append(symb); +278 } +279 break; +280 case 'Q': +281 code.append('K'); +282 break; +283 case 'S': +284 if (regionMatch(local,n,"SH") || +285 regionMatch(local,n,"SIO") || +286 regionMatch(local,n,"SIA")) { +287 code.append('X'); +288 } else { +289 code.append('S'); +290 } +291 break; +292 case 'T': +293 if (regionMatch(local,n,"TIA") || +294 regionMatch(local,n,"TIO")) { +295 code.append('X'); +296 break; +297 } +298 if (regionMatch(local,n,"TCH")) { +299 // Silent if in "TCH" +300 break; +301 } +302 // substitute numeral 0 for TH (resembles theta after all) +303 if (regionMatch(local,n,"TH")) { +304 code.append('0'); +305 } else { +306 code.append('T'); +307 } +308 break; +309 case 'V': +310 code.append('F'); break; +311 case 'W': +312 case 'Y': // silent if not followed by vowel +313 if (!isLastChar(wdsz,n) && +314 isVowel(local,n+1)) { +315 code.append(symb); +316 } +317 break; +318 case 'X': +319 code.append('K'); +320 code.append('S'); +321 break; +322 case 'Z': +323 code.append('S'); +324 break; +325 default: +326 // do nothing +327 break; +328 } // end switch +329 n++; +330 } // end else from symb != 'C' +331 if (code.length() > this.getMaxCodeLen()) { +332 code.setLength(this.getMaxCodeLen()); +333 } +334 } +335 return code.toString(); +336 } +337 +338 private boolean isVowel(final StringBuilder string, final int index) { +339 return VOWELS.indexOf(string.charAt(index)) >= 0; +340 } +341 +342 private boolean isPreviousChar(final StringBuilder string, final int index, final char c) { +343 boolean matches = false; +344 if( index > 0 && +345 index < string.length() ) { +346 matches = string.charAt(index - 1) == c; +347 } +348 return matches; +349 } +350 +351 private boolean isNextChar(final StringBuilder string, final int index, final char c) { +352 boolean matches = false; +353 if( index >= 0 && +354 index < string.length() - 1 ) { +355 matches = string.charAt(index + 1) == c; +356 } +357 return matches; +358 } +359 +360 private boolean regionMatch(final StringBuilder string, final int index, final String test) { +361 boolean matches = false; +362 if( index >= 0 && +363 index + test.length() - 1 < string.length() ) { +364 final String substring = string.substring( index, index + test.length()); +365 matches = substring.equals( test ); +366 } +367 return matches; +368 } +369 +370 private boolean isLastChar(final int wdsz, final int n) { +371 return n + 1 == wdsz; +372 } +373 +374 +375 /** +376 * Encodes an Object using the metaphone algorithm. This method +377 * is provided in order to satisfy the requirements of the +378 * Encoder interface, and will throw an EncoderException if the +379 * supplied object is not of type java.lang.String. +380 * +381 * @param obj Object to encode +382 * @return An object (or type java.lang.String) containing the +383 * metaphone code which corresponds to the String supplied. +384 * @throws EncoderException if the parameter supplied is not +385 * of type java.lang.String +386 */ +387 @Override +388 public Object encode(final Object obj) throws EncoderException { +389 if (!(obj instanceof String)) { +390 throw new EncoderException("Parameter supplied to Metaphone encode is not of type java.lang.String"); +391 } +392 return metaphone((String) obj); +393 } +394 +395 /** +396 * Encodes a String using the Metaphone algorithm. +397 * +398 * @param str String object to encode +399 * @return The metaphone code corresponding to the String supplied +400 */ +401 @Override +402 public String encode(final String str) { +403 return metaphone(str); +404 } +405 +406 /** +407 * Tests is the metaphones of two strings are identical. +408 * +409 * @param str1 First of two strings to compare +410 * @param str2 Second of two strings to compare +411 * @return <code>true</code> if the metaphones of these strings are identical, +412 * <code>false</code> otherwise. +413 */ +414 public boolean isMetaphoneEqual(final String str1, final String str2) { +415 return metaphone(str1).equals(metaphone(str2)); +416 } +417 +418 /** +419 * Returns the maxCodeLen. +420 * @return int +421 */ +422 public int getMaxCodeLen() { return this.maxCodeLen; } +423 +424 /** +425 * Sets the maxCodeLen. +426 * @param maxCodeLen The maxCodeLen to set +427 */ +428 public void setMaxCodeLen(final int maxCodeLen) { this.maxCodeLen = maxCodeLen; } +429 +430} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import java.util.regex.Pattern; +021 +022import org.apache.commons.codec.EncoderException; +023import org.apache.commons.codec.StringEncoder; +024 +025/** +026 * Encodes a string into a NYSIIS value. NYSIIS is an encoding used to relate similar names, but can also be used as a +027 * general purpose scheme to find word with similar phonemes. +028 * <p> +029 * NYSIIS features an accuracy increase of 2.7% over the traditional Soundex algorithm. +030 * <p> +031 * Algorithm description: +032 * <pre> +033 * 1. Transcode first characters of name +034 * 1a. MAC -> MCC +035 * 1b. KN -> NN +036 * 1c. K -> C +037 * 1d. PH -> FF +038 * 1e. PF -> FF +039 * 1f. SCH -> SSS +040 * 2. Transcode last characters of name +041 * 2a. EE, IE -> Y +042 * 2b. DT,RT,RD,NT,ND -> D +043 * 3. First character of key = first character of name +044 * 4. Transcode remaining characters by following these rules, incrementing by one character each time +045 * 4a. EV -> AF else A,E,I,O,U -> A +046 * 4b. Q -> G +047 * 4c. Z -> S +048 * 4d. M -> N +049 * 4e. KN -> N else K -> C +050 * 4f. SCH -> SSS +051 * 4g. PH -> FF +052 * 4h. H -> If previous or next is nonvowel, previous +053 * 4i. W -> If previous is vowel, previous +054 * 4j. Add current to key if current != last key character +055 * 5. If last character is S, remove it +056 * 6. If last characters are AY, replace with Y +057 * 7. If last character is A, remove it +058 * 8. Collapse all strings of repeated characters +059 * 9. Add original first character of name as first character of key +060 * </pre> +061 * <p> +062 * This class is immutable and thread-safe. +063 * +064 * @see <a href="http://en.wikipedia.org/wiki/NYSIIS">NYSIIS on Wikipedia</a> +065 * @see <a href="http://www.dropby.com/NYSIIS.html">NYSIIS on dropby.com</a> +066 * @see Soundex +067 * @since 1.7 +068 * @version $Id$ +069 */ +070public class Nysiis implements StringEncoder { +071 +072 private static final char[] CHARS_A = new char[] { 'A' }; +073 private static final char[] CHARS_AF = new char[] { 'A', 'F' }; +074 private static final char[] CHARS_C = new char[] { 'C' }; +075 private static final char[] CHARS_FF = new char[] { 'F', 'F' }; +076 private static final char[] CHARS_G = new char[] { 'G' }; +077 private static final char[] CHARS_N = new char[] { 'N' }; +078 private static final char[] CHARS_NN = new char[] { 'N', 'N' }; +079 private static final char[] CHARS_S = new char[] { 'S' }; +080 private static final char[] CHARS_SSS = new char[] { 'S', 'S', 'S' }; +081 +082 private static final Pattern PAT_MAC = Pattern.compile("^MAC"); +083 private static final Pattern PAT_KN = Pattern.compile("^KN"); +084 private static final Pattern PAT_K = Pattern.compile("^K"); +085 private static final Pattern PAT_PH_PF = Pattern.compile("^(PH|PF)"); +086 private static final Pattern PAT_SCH = Pattern.compile("^SCH"); +087 private static final Pattern PAT_EE_IE = Pattern.compile("(EE|IE)$"); +088 private static final Pattern PAT_DT_ETC = Pattern.compile("(DT|RT|RD|NT|ND)$"); +089 +090 private static final char SPACE = ' '; +091 private static final int TRUE_LENGTH = 6; +092 +093 /** +094 * Tests if the given character is a vowel. +095 * +096 * @param c +097 * the character to test +098 * @return <code>true</code> if the character is a vowel, <code>false</code> otherwise +099 */ +100 private static boolean isVowel(final char c) { +101 return c == 'A' || c == 'E' || c == 'I' || c == 'O' || c == 'U'; +102 } +103 +104 /** +105 * Transcodes the remaining parts of the String. The method operates on a sliding window, looking at 4 characters at +106 * a time: [i-1, i, i+1, i+2]. +107 * +108 * @param prev +109 * the previous character +110 * @param curr +111 * the current character +112 * @param next +113 * the next character +114 * @param aNext +115 * the after next character +116 * @return a transcoded array of characters, starting from the current position +117 */ +118 private static char[] transcodeRemaining(final char prev, final char curr, final char next, final char aNext) { +119 // 1. EV -> AF +120 if (curr == 'E' && next == 'V') { +121 return CHARS_AF; +122 } +123 +124 // A, E, I, O, U -> A +125 if (isVowel(curr)) { +126 return CHARS_A; +127 } +128 +129 // 2. Q -> G, Z -> S, M -> N +130 if (curr == 'Q') { +131 return CHARS_G; +132 } else if (curr == 'Z') { +133 return CHARS_S; +134 } else if (curr == 'M') { +135 return CHARS_N; +136 } +137 +138 // 3. KN -> NN else K -> C +139 if (curr == 'K') { +140 if (next == 'N') { +141 return CHARS_NN; +142 } +143 return CHARS_C; +144 } +145 +146 // 4. SCH -> SSS +147 if (curr == 'S' && next == 'C' && aNext == 'H') { +148 return CHARS_SSS; +149 } +150 +151 // PH -> FF +152 if (curr == 'P' && next == 'H') { +153 return CHARS_FF; +154 } +155 +156 // 5. H -> If previous or next is a non vowel, previous. +157 if (curr == 'H' && (!isVowel(prev) || !isVowel(next))) { +158 return new char[] { prev }; +159 } +160 +161 // 6. W -> If previous is vowel, previous. +162 if (curr == 'W' && isVowel(prev)) { +163 return new char[] { prev }; +164 } +165 +166 return new char[] { curr }; +167 } +168 +169 /** Indicates the strict mode. */ +170 private final boolean strict; +171 +172 /** +173 * Creates an instance of the {@link Nysiis} encoder with strict mode (original form), +174 * i.e. encoded strings have a maximum length of 6. +175 */ +176 public Nysiis() { +177 this(true); +178 } +179 +180 /** +181 * Create an instance of the {@link Nysiis} encoder with the specified strict mode: +182 * +183 * <ul> +184 * <li><code>true</code>: encoded strings have a maximum length of 6</li> +185 * <li><code>false</code>: encoded strings may have arbitrary length</li> +186 * </ul> +187 * +188 * @param strict +189 * the strict mode +190 */ +191 public Nysiis(final boolean strict) { +192 this.strict = strict; +193 } +194 +195 /** +196 * Encodes an Object using the NYSIIS algorithm. This method is provided in order to satisfy the requirements of the +197 * Encoder interface, and will throw an {@link EncoderException} if the supplied object is not of type +198 * {@link String}. +199 * +200 * @param obj +201 * Object to encode +202 * @return An object (or a {@link String}) containing the NYSIIS code which corresponds to the given String. +203 * @throws EncoderException +204 * if the parameter supplied is not of a {@link String} +205 * @throws IllegalArgumentException +206 * if a character is not mapped +207 */ +208 @Override +209 public Object encode(final Object obj) throws EncoderException { +210 if (!(obj instanceof String)) { +211 throw new EncoderException("Parameter supplied to Nysiis encode is not of type java.lang.String"); +212 } +213 return this.nysiis((String) obj); +214 } +215 +216 /** +217 * Encodes a String using the NYSIIS algorithm. +218 * +219 * @param str +220 * A String object to encode +221 * @return A Nysiis code corresponding to the String supplied +222 * @throws IllegalArgumentException +223 * if a character is not mapped +224 */ +225 @Override +226 public String encode(final String str) { +227 return this.nysiis(str); +228 } +229 +230 /** +231 * Indicates the strict mode for this {@link Nysiis} encoder. +232 * +233 * @return <code>true</code> if the encoder is configured for strict mode, <code>false</code> otherwise +234 */ +235 public boolean isStrict() { +236 return this.strict; +237 } +238 +239 /** +240 * Retrieves the NYSIIS code for a given String object. +241 * +242 * @param str +243 * String to encode using the NYSIIS algorithm +244 * @return A NYSIIS code for the String supplied +245 */ +246 public String nysiis(String str) { +247 if (str == null) { +248 return null; +249 } +250 +251 // Use the same clean rules as Soundex +252 str = SoundexUtils.clean(str); +253 +254 if (str.length() == 0) { +255 return str; +256 } +257 +258 // Translate first characters of name: +259 // MAC -> MCC, KN -> NN, K -> C, PH | PF -> FF, SCH -> SSS +260 str = PAT_MAC.matcher(str).replaceFirst("MCC"); +261 str = PAT_KN.matcher(str).replaceFirst("NN"); +262 str = PAT_K.matcher(str).replaceFirst("C"); +263 str = PAT_PH_PF.matcher(str).replaceFirst("FF"); +264 str = PAT_SCH.matcher(str).replaceFirst("SSS"); +265 +266 // Translate last characters of name: +267 // EE -> Y, IE -> Y, DT | RT | RD | NT | ND -> D +268 str = PAT_EE_IE.matcher(str).replaceFirst("Y"); +269 str = PAT_DT_ETC.matcher(str).replaceFirst("D"); +270 +271 // First character of key = first character of name. +272 final StringBuilder key = new StringBuilder(str.length()); +273 key.append(str.charAt(0)); +274 +275 // Transcode remaining characters, incrementing by one character each time +276 final char[] chars = str.toCharArray(); +277 final int len = chars.length; +278 +279 for (int i = 1; i < len; i++) { +280 final char next = i < len - 1 ? chars[i + 1] : SPACE; +281 final char aNext = i < len - 2 ? chars[i + 2] : SPACE; +282 final char[] transcoded = transcodeRemaining(chars[i - 1], chars[i], next, aNext); +283 System.arraycopy(transcoded, 0, chars, i, transcoded.length); +284 +285 // only append the current char to the key if it is different from the last one +286 if (chars[i] != chars[i - 1]) { +287 key.append(chars[i]); +288 } +289 } +290 +291 if (key.length() > 1) { +292 char lastChar = key.charAt(key.length() - 1); +293 +294 // If last character is S, remove it. +295 if (lastChar == 'S') { +296 key.deleteCharAt(key.length() - 1); +297 lastChar = key.charAt(key.length() - 1); +298 } +299 +300 if (key.length() > 2) { +301 final char last2Char = key.charAt(key.length() - 2); +302 // If last characters are AY, replace with Y. +303 if (last2Char == 'A' && lastChar == 'Y') { +304 key.deleteCharAt(key.length() - 2); +305 } +306 } +307 +308 // If last character is A, remove it. +309 if (lastChar == 'A') { +310 key.deleteCharAt(key.length() - 1); +311 } +312 } +313 +314 final String string = key.toString(); +315 return this.isStrict() ? string.substring(0, Math.min(TRUE_LENGTH, string.length())) : string; +316 } +317 +318} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes a string into a Refined Soundex value. A refined soundex code is +025 * optimized for spell checking words. Soundex method originally developed by +026 * <CITE>Margaret Odell</CITE> and <CITE>Robert Russell</CITE>. +027 * +028 * <p>This class is immutable and thread-safe.</p> +029 * +030 * @version $Id$ +031 */ +032public class RefinedSoundex implements StringEncoder { +033 +034 /** +035 * Mapping: +036 * <pre> +037 * 0: A E I O U Y H W +038 * 1: B P +039 * 2: F V +040 * 3: C K S +041 * 4: G J +042 * 5: Q X Z +043 * 6: D T +044 * 7: L +045 * 8: M N +046 * 9: R +047 * </pre> +048 * @since 1.4 +049 */ +050 // ABCDEFGHIJKLMNOPQRSTUVWXYZ +051 public static final String US_ENGLISH_MAPPING_STRING = "01360240043788015936020505"; +052 +053 /** +054 * RefinedSoundex is *refined* for a number of reasons one being that the +055 * mappings have been altered. This implementation contains default +056 * mappings for US English. +057 */ +058 private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); +059 +060 /** +061 * Every letter of the alphabet is "mapped" to a numerical value. This char +062 * array holds the values to which each letter is mapped. This +063 * implementation contains a default map for US_ENGLISH +064 */ +065 private final char[] soundexMapping; +066 +067 /** +068 * This static variable contains an instance of the RefinedSoundex using +069 * the US_ENGLISH mapping. +070 */ +071 public static final RefinedSoundex US_ENGLISH = new RefinedSoundex(); +072 +073 /** +074 * Creates an instance of the RefinedSoundex object using the default US +075 * English mapping. +076 */ +077 public RefinedSoundex() { +078 this.soundexMapping = US_ENGLISH_MAPPING; +079 } +080 +081 /** +082 * Creates a refined soundex instance using a custom mapping. This +083 * constructor can be used to customize the mapping, and/or possibly +084 * provide an internationalized mapping for a non-Western character set. +085 * +086 * @param mapping +087 * Mapping array to use when finding the corresponding code for +088 * a given character +089 */ +090 public RefinedSoundex(final char[] mapping) { +091 this.soundexMapping = new char[mapping.length]; +092 System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length); +093 } +094 +095 /** +096 * Creates a refined Soundex instance using a custom mapping. This constructor can be used to customize the mapping, +097 * and/or possibly provide an internationalized mapping for a non-Western character set. +098 * +099 * @param mapping +100 * Mapping string to use when finding the corresponding code for a given character +101 * @since 1.4 +102 */ +103 public RefinedSoundex(final String mapping) { +104 this.soundexMapping = mapping.toCharArray(); +105 } +106 +107 /** +108 * Returns the number of characters in the two encoded Strings that are the +109 * same. This return value ranges from 0 to the length of the shortest +110 * encoded String: 0 indicates little or no similarity, and 4 out of 4 (for +111 * example) indicates strong similarity or identical values. For refined +112 * Soundex, the return value can be greater than 4. +113 * +114 * @param s1 +115 * A String that will be encoded and compared. +116 * @param s2 +117 * A String that will be encoded and compared. +118 * @return The number of characters in the two encoded Strings that are the +119 * same from 0 to to the length of the shortest encoded String. +120 * +121 * @see SoundexUtils#difference(StringEncoder,String,String) +122 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> +123 * MS T-SQL DIFFERENCE</a> +124 * +125 * @throws EncoderException +126 * if an error occurs encoding one of the strings +127 * @since 1.3 +128 */ +129 public int difference(final String s1, final String s2) throws EncoderException { +130 return SoundexUtils.difference(this, s1, s2); +131 } +132 +133 /** +134 * Encodes an Object using the refined soundex algorithm. This method is +135 * provided in order to satisfy the requirements of the Encoder interface, +136 * and will throw an EncoderException if the supplied object is not of type +137 * java.lang.String. +138 * +139 * @param obj +140 * Object to encode +141 * @return An object (or type java.lang.String) containing the refined +142 * soundex code which corresponds to the String supplied. +143 * @throws EncoderException +144 * if the parameter supplied is not of type java.lang.String +145 */ +146 @Override +147 public Object encode(final Object obj) throws EncoderException { +148 if (!(obj instanceof String)) { +149 throw new EncoderException("Parameter supplied to RefinedSoundex encode is not of type java.lang.String"); +150 } +151 return soundex((String) obj); +152 } +153 +154 /** +155 * Encodes a String using the refined soundex algorithm. +156 * +157 * @param str +158 * A String object to encode +159 * @return A Soundex code corresponding to the String supplied +160 */ +161 @Override +162 public String encode(final String str) { +163 return soundex(str); +164 } +165 +166 /** +167 * Returns the mapping code for a given character. The mapping codes are +168 * maintained in an internal char array named soundexMapping, and the +169 * default values of these mappings are US English. +170 * +171 * @param c +172 * char to get mapping for +173 * @return A character (really a numeral) to return for the given char +174 */ +175 char getMappingCode(final char c) { +176 if (!Character.isLetter(c)) { +177 return 0; +178 } +179 return this.soundexMapping[Character.toUpperCase(c) - 'A']; +180 } +181 +182 /** +183 * Retrieves the Refined Soundex code for a given String object. +184 * +185 * @param str +186 * String to encode using the Refined Soundex algorithm +187 * @return A soundex code for the String supplied +188 */ +189 public String soundex(String str) { +190 if (str == null) { +191 return null; +192 } +193 str = SoundexUtils.clean(str); +194 if (str.length() == 0) { +195 return str; +196 } +197 +198 final StringBuilder sBuf = new StringBuilder(); +199 sBuf.append(str.charAt(0)); +200 +201 char last, current; +202 last = '*'; +203 +204 for (int i = 0; i < str.length(); i++) { +205 +206 current = getMappingCode(str.charAt(i)); +207 if (current == last) { +208 continue; +209 } else if (current != 0) { +210 sBuf.append(current); +211 } +212 +213 last = current; +214 +215 } +216 +217 return sBuf.toString(); +218 } +219} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes a string into a Soundex value. Soundex is an encoding used to relate similar names, but can also be used as a +025 * general purpose scheme to find word with similar phonemes. +026 * +027 * This class is thread-safe. +028 * Although not strictly immutable, the {@link #maxLength} field is not actually used. +029 * +030 * @version $Id$ +031 */ +032public class Soundex implements StringEncoder { +033 +034 /** +035 * The marker character used to indicate a silent (ignored) character. +036 * These are ignored except when they appear as the first character. +037 * <p> +038 * Note: the {@link #US_ENGLISH_MAPPING_STRING} does not use this mechanism +039 * because changing it might break existing code. Mappings that don't contain +040 * a silent marker code are treated as though H and W are silent. +041 * <p> +042 * To override this, use the {@link #Soundex(String, boolean)} constructor. +043 * @since 1.11 +044 */ +045 public static final char SILENT_MARKER = '-'; +046 +047 /** +048 * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position +049 * means do not encode, but treat as a separator when it occurs between consonants with the same code. +050 * <p> +051 * (This constant is provided as both an implementation convenience and to allow Javadoc to pick +052 * up the value for the constant values page.) +053 * <p> +054 * <b>Note that letters H and W are treated specially.</b> +055 * They are ignored (after the first letter) and don't act as separators +056 * between consonants with the same code. +057 * @see #US_ENGLISH_MAPPING +058 */ +059 // ABCDEFGHIJKLMNOPQRSTUVWXYZ +060 public static final String US_ENGLISH_MAPPING_STRING = "01230120022455012623010202"; +061 +062 /** +063 * This is a default mapping of the 26 letters used in US English. A value of <code>0</code> for a letter position +064 * means do not encode. +065 * +066 * @see Soundex#Soundex(char[]) +067 */ +068 private static final char[] US_ENGLISH_MAPPING = US_ENGLISH_MAPPING_STRING.toCharArray(); +069 +070 /** +071 * An instance of Soundex using the US_ENGLISH_MAPPING mapping. +072 * This treats H and W as silent letters. +073 * Apart from when they appear as the first letter, they are ignored. +074 * They don't act as separators between duplicate codes. +075 * +076 * @see #US_ENGLISH_MAPPING +077 * @see #US_ENGLISH_MAPPING_STRING +078 */ +079 public static final Soundex US_ENGLISH = new Soundex(); +080 +081 /** +082 * An instance of Soundex using the Simplified Soundex mapping, as described here: +083 * http://west-penwith.org.uk/misc/soundex.htm +084 * <p> +085 * This treats H and W the same as vowels (AEIOUY). +086 * Such letters aren't encoded (after the first), but they do +087 * act as separators when dropping duplicate codes. +088 * The mapping is otherwise the same as for {@link #US_ENGLISH} +089 * <p> +090 * @since 1.11 +091 */ +092 public static final Soundex US_ENGLISH_SIMPLIFIED = new Soundex(US_ENGLISH_MAPPING_STRING, false); +093 +094 /** +095 * An instance of Soundex using the mapping as per the Genealogy site: +096 * http://www.genealogy.com/articles/research/00000060.html +097 * <p> +098 * This treats vowels (AEIOUY), H and W as silent letters. +099 * Such letters are ignored (after the first) and do not +100 * act as separators when dropping duplicate codes. +101 * <p> +102 * The codes for consonants are otherwise the same as for +103 * {@link #US_ENGLISH_MAPPING_STRING} and {@link #US_ENGLISH_SIMPLIFIED} +104 * +105 * @since 1.11 +106 */ +107 public static final Soundex US_ENGLISH_GENEALOGY = new Soundex("-123-12--22455-12623-1-2-2"); +108 // ABCDEFGHIJKLMNOPQRSTUVWXYZ +109 +110 /** +111 * The maximum length of a Soundex code - Soundex codes are only four characters by definition. +112 * +113 * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. +114 */ +115 @Deprecated +116 private int maxLength = 4; +117 +118 /** +119 * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each +120 * letter is mapped. This implementation contains a default map for US_ENGLISH +121 */ +122 private final char[] soundexMapping; +123 +124 /** +125 * Should H and W be treated specially? +126 * <p> +127 * In versions of the code prior to 1.11, +128 * the code always treated H and W as silent (ignored) letters. +129 * If this field is false, H and W are no longer special-cased. +130 */ +131 private final boolean specialCaseHW; +132 +133 /** +134 * Creates an instance using US_ENGLISH_MAPPING +135 * +136 * @see Soundex#Soundex(char[]) +137 * @see Soundex#US_ENGLISH_MAPPING +138 */ +139 public Soundex() { +140 this.soundexMapping = US_ENGLISH_MAPPING; +141 this.specialCaseHW = true; +142 } +143 +144 /** +145 * Creates a soundex instance using the given mapping. This constructor can be used to provide an internationalized +146 * mapping for a non-Western character set. +147 * +148 * Every letter of the alphabet is "mapped" to a numerical value. This char array holds the values to which each +149 * letter is mapped. This implementation contains a default map for US_ENGLISH +150 * <p> +151 * If the mapping contains an instance of {@link #SILENT_MARKER} then H and W are not given special treatment +152 * +153 * @param mapping +154 * Mapping array to use when finding the corresponding code for a given character +155 */ +156 public Soundex(final char[] mapping) { +157 this.soundexMapping = new char[mapping.length]; +158 System.arraycopy(mapping, 0, this.soundexMapping, 0, mapping.length); +159 this.specialCaseHW = !hasMarker(this.soundexMapping); +160 } +161 +162 private boolean hasMarker(final char[] mapping) { +163 for(final char ch : mapping) { +164 if (ch == SILENT_MARKER) { +165 return true; +166 } +167 } +168 return false; +169 } +170 +171 /** +172 * Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping, +173 * and/or possibly provide an internationalized mapping for a non-Western character set. +174 * <p> +175 * If the mapping contains an instance of {@link #SILENT_MARKER} then H and W are not given special treatment +176 * +177 * @param mapping +178 * Mapping string to use when finding the corresponding code for a given character +179 * @since 1.4 +180 */ +181 public Soundex(final String mapping) { +182 this.soundexMapping = mapping.toCharArray(); +183 this.specialCaseHW = !hasMarker(this.soundexMapping); +184 } +185 +186 /** +187 * Creates a refined soundex instance using a custom mapping. This constructor can be used to customize the mapping, +188 * and/or possibly provide an internationalized mapping for a non-Western character set. +189 * +190 * @param mapping +191 * Mapping string to use when finding the corresponding code for a given character +192 * @param specialCaseHW if true, then +193 * @since 1.11 +194 */ +195 public Soundex(final String mapping, final boolean specialCaseHW) { +196 this.soundexMapping = mapping.toCharArray(); +197 this.specialCaseHW = specialCaseHW; +198 } +199 +200 /** +201 * Encodes the Strings and returns the number of characters in the two encoded Strings that are the same. This +202 * return value ranges from 0 through 4: 0 indicates little or no similarity, and 4 indicates strong similarity or +203 * identical values. +204 * +205 * @param s1 +206 * A String that will be encoded and compared. +207 * @param s2 +208 * A String that will be encoded and compared. +209 * @return The number of characters in the two encoded Strings that are the same from 0 to 4. +210 * +211 * @see SoundexUtils#difference(StringEncoder,String,String) +212 * @see <a href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/tsqlref/ts_de-dz_8co5.asp"> MS +213 * T-SQL DIFFERENCE </a> +214 * +215 * @throws EncoderException +216 * if an error occurs encoding one of the strings +217 * @since 1.3 +218 */ +219 public int difference(final String s1, final String s2) throws EncoderException { +220 return SoundexUtils.difference(this, s1, s2); +221 } +222 +223 /** +224 * Encodes an Object using the soundex algorithm. This method is provided in order to satisfy the requirements of +225 * the Encoder interface, and will throw an EncoderException if the supplied object is not of type java.lang.String. +226 * +227 * @param obj +228 * Object to encode +229 * @return An object (or type java.lang.String) containing the soundex code which corresponds to the String +230 * supplied. +231 * @throws EncoderException +232 * if the parameter supplied is not of type java.lang.String +233 * @throws IllegalArgumentException +234 * if a character is not mapped +235 */ +236 @Override +237 public Object encode(final Object obj) throws EncoderException { +238 if (!(obj instanceof String)) { +239 throw new EncoderException("Parameter supplied to Soundex encode is not of type java.lang.String"); +240 } +241 return soundex((String) obj); +242 } +243 +244 /** +245 * Encodes a String using the soundex algorithm. +246 * +247 * @param str +248 * A String object to encode +249 * @return A Soundex code corresponding to the String supplied +250 * @throws IllegalArgumentException +251 * if a character is not mapped +252 */ +253 @Override +254 public String encode(final String str) { +255 return soundex(str); +256 } +257 +258 /** +259 * Returns the maxLength. Standard Soundex +260 * +261 * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. +262 * @return int +263 */ +264 @Deprecated +265 public int getMaxLength() { +266 return this.maxLength; +267 } +268 +269 /** +270 * Maps the given upper-case character to its Soundex code. +271 * +272 * @param ch +273 * An upper-case character. +274 * @return A Soundex code. +275 * @throws IllegalArgumentException +276 * Thrown if <code>ch</code> is not mapped. +277 */ +278 private char map(final char ch) { +279 final int index = ch - 'A'; +280 if (index < 0 || index >= this.soundexMapping.length) { +281 throw new IllegalArgumentException("The character is not mapped: " + ch + " (index=" + index + ")"); +282 } +283 return this.soundexMapping[index]; +284 } +285 +286 /** +287 * Sets the maxLength. +288 * +289 * @deprecated This feature is not needed since the encoding size must be constant. Will be removed in 2.0. +290 * @param maxLength +291 * The maxLength to set +292 */ +293 @Deprecated +294 public void setMaxLength(final int maxLength) { +295 this.maxLength = maxLength; +296 } +297 +298 /** +299 * Retrieves the Soundex code for a given String object. +300 * +301 * @param str +302 * String to encode using the Soundex algorithm +303 * @return A soundex code for the String supplied +304 * @throws IllegalArgumentException +305 * if a character is not mapped +306 */ +307 public String soundex(String str) { +308 if (str == null) { +309 return null; +310 } +311 str = SoundexUtils.clean(str); +312 if (str.length() == 0) { +313 return str; +314 } +315 final char out[] = {'0', '0', '0', '0'}; +316 int count = 0; +317 final char first = str.charAt(0); +318 out[count++] = first; +319 char lastDigit = map(first); // previous digit +320 for(int i = 1; i < str.length() && count < out.length ; i++) { +321 final char ch = str.charAt(i); +322 if ((this.specialCaseHW) && (ch == 'H' || ch == 'W')) { // these are ignored completely +323 continue; +324 } +325 final char digit = map(ch); +326 if (digit == SILENT_MARKER) { +327 continue; +328 } +329 if (digit != '0' && digit != lastDigit) { // don't store vowels or repeats +330 out[count++] = digit; +331 } +332 lastDigit = digit; +333 } +334 return new String(out); +335 } +336 +337} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import org.apache.commons.codec.EncoderException; +021import org.apache.commons.codec.StringEncoder; +022 +023/** +024 * Encodes strings into their Beider-Morse phonetic encoding. +025 * <p> +026 * Beider-Morse phonetic encodings are optimised for family names. However, they may be useful for a wide range of +027 * words. +028 * <p> +029 * This encoder is intentionally mutable to allow dynamic configuration through bean properties. As such, it is mutable, +030 * and may not be thread-safe. If you require a guaranteed thread-safe encoding then use {@link PhoneticEngine} +031 * directly. +032 * <p> +033 * <b>Encoding overview</b> +034 * <p> +035 * Beider-Morse phonetic encodings is a multi-step process. Firstly, a table of rules is consulted to guess what +036 * language the word comes from. For example, if it ends in "<code>ault</code>" then it infers that the word is French. +037 * Next, the word is translated into a phonetic representation using a language-specific phonetics table. Some runs of +038 * letters can be pronounced in multiple ways, and a single run of letters may be potentially broken up into phonemes at +039 * different places, so this stage results in a set of possible language-specific phonetic representations. Lastly, this +040 * language-specific phonetic representation is processed by a table of rules that re-writes it phonetically taking into +041 * account systematic pronunciation differences between languages, to move it towards a pan-indo-european phonetic +042 * representation. Again, sometimes there are multiple ways this could be done and sometimes things that can be +043 * pronounced in several ways in the source language have only one way to represent them in this average phonetic +044 * language, so the result is again a set of phonetic spellings. +045 * <p> +046 * Some names are treated as having multiple parts. This can be due to two things. Firstly, they may be hyphenated. In +047 * this case, each individual hyphenated word is encoded, and then these are combined end-to-end for the final encoding. +048 * Secondly, some names have standard prefixes, for example, "<code>Mac/Mc</code>" in Scottish (English) names. As +049 * sometimes it is ambiguous whether the prefix is intended or is an accident of the spelling, the word is encoded once +050 * with the prefix and once without it. The resulting encoding contains one and then the other result. +051 * <p> +052 * <b>Encoding format</b> +053 * <p> +054 * Individual phonetic spellings of an input word are represented in upper- and lower-case roman characters. Where there +055 * are multiple possible phonetic representations, these are joined with a pipe (<code>|</code>) character. If multiple +056 * hyphenated words where found, or if the word may contain a name prefix, each encoded word is placed in elipses and +057 * these blocks are then joined with hyphens. For example, "<code>d'ortley</code>" has a possible prefix. The form +058 * without prefix encodes to "<code>ortlaj|ortlej</code>", while the form with prefix encodes to " +059 * <code>dortlaj|dortlej</code>". Thus, the full, combined encoding is "<code>(ortlaj|ortlej)-(dortlaj|dortlej)</code>". +060 * <p> +061 * The encoded forms are often quite a bit longer than the input strings. This is because a single input may have many +062 * potential phonetic interpretations. For example, "<code>Renault</code>" encodes to " +063 * <code>rYnDlt|rYnalt|rYnult|rinDlt|rinalt|rinult</code>". The <code>APPROX</code> rules will tend to produce larger +064 * encodings as they consider a wider range of possible, approximate phonetic interpretations of the original word. +065 * Down-stream applications may wish to further process the encoding for indexing or lookup purposes, for example, by +066 * splitting on pipe (<code>|</code>) and indexing under each of these alternatives. +067 * <p> +068 * <b>Note</b>: this version of the Beider-Morse encoding is equivalent with v3.4 of the reference implementation. +069 * </p> +070 * @see <a href="http://stevemorse.org/phonetics/bmpm.htm">Beider-Morse Phonetic Matching</a> +071 * @see <a href="http://stevemorse.org/phoneticinfo.htm">Reference implementation</a> +072 * +073 * <p> +074 * This class is Not ThreadSafe +075 * </p> +076 * @since 1.6 +077 * @version $Id$ +078 */ +079public class BeiderMorseEncoder implements StringEncoder { +080 // Implementation note: This class is a spring-friendly facade to PhoneticEngine. It allows read/write configuration +081 // of an immutable PhoneticEngine instance that will be delegated to for the actual encoding. +082 +083 // a cached object +084 private PhoneticEngine engine = new PhoneticEngine(NameType.GENERIC, RuleType.APPROX, true); +085 +086 @Override +087 public Object encode(final Object source) throws EncoderException { +088 if (!(source instanceof String)) { +089 throw new EncoderException("BeiderMorseEncoder encode parameter is not of type String"); +090 } +091 return encode((String) source); +092 } +093 +094 @Override +095 public String encode(final String source) throws EncoderException { +096 if (source == null) { +097 return null; +098 } +099 return this.engine.encode(source); +100 } +101 +102 /** +103 * Gets the name type currently in operation. +104 * +105 * @return the NameType currently being used +106 */ +107 public NameType getNameType() { +108 return this.engine.getNameType(); +109 } +110 +111 /** +112 * Gets the rule type currently in operation. +113 * +114 * @return the RuleType currently being used +115 */ +116 public RuleType getRuleType() { +117 return this.engine.getRuleType(); +118 } +119 +120 /** +121 * Discovers if multiple possible encodings are concatenated. +122 * +123 * @return true if multiple encodings are concatenated, false if just the first one is returned +124 */ +125 public boolean isConcat() { +126 return this.engine.isConcat(); +127 } +128 +129 /** +130 * Sets how multiple possible phonetic encodings are combined. +131 * +132 * @param concat +133 * true if multiple encodings are to be combined with a '|', false if just the first one is +134 * to be considered +135 */ +136 public void setConcat(final boolean concat) { +137 this.engine = new PhoneticEngine(this.engine.getNameType(), +138 this.engine.getRuleType(), +139 concat, +140 this.engine.getMaxPhonemes()); +141 } +142 +143 /** +144 * Sets the type of name. Use {@link NameType#GENERIC} unless you specifically want phonetic encodings +145 * optimized for Ashkenazi or Sephardic Jewish family names. +146 * +147 * @param nameType +148 * the NameType in use +149 */ +150 public void setNameType(final NameType nameType) { +151 this.engine = new PhoneticEngine(nameType, +152 this.engine.getRuleType(), +153 this.engine.isConcat(), +154 this.engine.getMaxPhonemes()); +155 } +156 +157 /** +158 * Sets the rule type to apply. This will widen or narrow the range of phonetic encodings considered. +159 * +160 * @param ruleType +161 * {@link RuleType#APPROX} or {@link RuleType#EXACT} for approximate or exact phonetic matches +162 */ +163 public void setRuleType(final RuleType ruleType) { +164 this.engine = new PhoneticEngine(this.engine.getNameType(), +165 ruleType, +166 this.engine.isConcat(), +167 this.engine.getMaxPhonemes()); +168 } +169 +170 /** +171 * Sets the number of maximum of phonemes that shall be considered by the engine. +172 * +173 * @param maxPhonemes +174 * the maximum number of phonemes returned by the engine +175 * @since 1.7 +176 */ +177 public void setMaxPhonemes(final int maxPhonemes) { +178 this.engine = new PhoneticEngine(this.engine.getNameType(), +179 this.engine.getRuleType(), +180 this.engine.isConcat(), +181 maxPhonemes); +182 } +183 +184} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.EnumMap; +025import java.util.HashSet; +026import java.util.List; +027import java.util.Locale; +028import java.util.Map; +029import java.util.Scanner; +030import java.util.Set; +031import java.util.regex.Pattern; +032 +033/** +034 * Language guessing utility. +035 * <p> +036 * This class encapsulates rules used to guess the possible languages that a word originates from. This is +037 * done by reference to a whole series of rules distributed in resource files. +038 * <p> +039 * Instances of this class are typically managed through the static factory method instance(). +040 * Unless you are developing your own language guessing rules, you will not need to interact with this class directly. +041 * <p> +042 * This class is intended to be immutable and thread-safe. +043 * <p> +044 * <b>Lang resources</b> +045 * <p> +046 * Language guessing rules are typically loaded from resource files. These are UTF-8 encoded text files. +047 * They are systematically named following the pattern: +048 * <blockquote>org/apache/commons/codec/language/bm/lang.txt</blockquote> +049 * The format of these resources is the following: +050 * <ul> +051 * <li><b>Rules:</b> whitespace separated strings. +052 * There should be 3 columns to each row, and these will be interpreted as: +053 * <ol> +054 * <li>pattern: a regular expression.</li> +055 * <li>languages: a '+'-separated list of languages.</li> +056 * <li>acceptOnMatch: 'true' or 'false' indicating if a match rules in or rules out the language.</li> +057 * </ol> +058 * </li> +059 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be +060 * discarded as a comment.</li> +061 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. +062 * This will skip all content until a line ending in '*' and '/' is found.</li> +063 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +064 * </ul> +065 * <p> +066 * Port of lang.php +067 * +068 * @since 1.6 +069 * @version $Id$ +070 */ +071public class Lang { +072 // Implementation note: This class is divided into two sections. The first part is a static factory interface that +073 // exposes the LANGUAGE_RULES_RN resource as a Lang instance. The second part is the Lang instance methods that +074 // encapsulate a particular language-guessing rule table and the language guessing itself. +075 // +076 // It may make sense in the future to expose the private constructor to allow power users to build custom language- +077 // guessing rules, perhaps by marking it protected and allowing sub-classing. However, the vast majority of users +078 // should be strongly encouraged to use the static factory <code>instance</code> method to get their Lang instances. +079 +080 private static final class LangRule { +081 private final boolean acceptOnMatch; +082 private final Set<String> languages; +083 private final Pattern pattern; +084 +085 private LangRule(final Pattern pattern, final Set<String> languages, final boolean acceptOnMatch) { +086 this.pattern = pattern; +087 this.languages = languages; +088 this.acceptOnMatch = acceptOnMatch; +089 } +090 +091 public boolean matches(final String txt) { +092 return this.pattern.matcher(txt).find(); +093 } +094 } +095 +096 private static final Map<NameType, Lang> Langs = new EnumMap<>(NameType.class); +097 +098 private static final String LANGUAGE_RULES_RN = "org/apache/commons/codec/language/bm/%s_lang.txt"; +099 +100 static { +101 for (final NameType s : NameType.values()) { +102 Langs.put(s, loadFromResource(String.format(LANGUAGE_RULES_RN, s.getName()), Languages.getInstance(s))); +103 } +104 } +105 +106 /** +107 * Gets a Lang instance for one of the supported NameTypes. +108 * +109 * @param nameType +110 * the NameType to look up +111 * @return a Lang encapsulating the language guessing rules for that name type +112 */ +113 public static Lang instance(final NameType nameType) { +114 return Langs.get(nameType); +115 } +116 +117 /** +118 * Loads language rules from a resource. +119 * <p> +120 * In normal use, you will obtain instances of Lang through the {@link #instance(NameType)} method. +121 * You will only need to call this yourself if you are developing custom language mapping rules. +122 * +123 * @param languageRulesResourceName +124 * the fully-qualified resource name to load +125 * @param languages +126 * the languages that these rules will support +127 * @return a Lang encapsulating the loaded language-guessing rules. +128 */ +129 public static Lang loadFromResource(final String languageRulesResourceName, final Languages languages) { +130 final List<LangRule> rules = new ArrayList<>(); +131 final InputStream lRulesIS = Lang.class.getClassLoader().getResourceAsStream(languageRulesResourceName); +132 +133 if (lRulesIS == null) { +134 throw new IllegalStateException("Unable to resolve required resource:" + LANGUAGE_RULES_RN); +135 } +136 +137 try (final Scanner scanner = new Scanner(lRulesIS, ResourceConstants.ENCODING)) { +138 boolean inExtendedComment = false; +139 while (scanner.hasNextLine()) { +140 final String rawLine = scanner.nextLine(); +141 String line = rawLine; +142 if (inExtendedComment) { +143 // check for closing comment marker, otherwise discard doc comment line +144 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +145 inExtendedComment = false; +146 } +147 } else { +148 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +149 inExtendedComment = true; +150 } else { +151 // discard comments +152 final int cmtI = line.indexOf(ResourceConstants.CMT); +153 if (cmtI >= 0) { +154 line = line.substring(0, cmtI); +155 } +156 +157 // trim leading-trailing whitespace +158 line = line.trim(); +159 +160 if (line.length() == 0) { +161 continue; // empty lines can be safely skipped +162 } +163 +164 // split it up +165 final String[] parts = line.split("\\s+"); +166 +167 if (parts.length != 3) { +168 throw new IllegalArgumentException("Malformed line '" + rawLine + +169 "' in language resource '" + languageRulesResourceName + "'"); +170 } +171 +172 final Pattern pattern = Pattern.compile(parts[0]); +173 final String[] langs = parts[1].split("\\+"); +174 final boolean accept = parts[2].equals("true"); +175 +176 rules.add(new LangRule(pattern, new HashSet<>(Arrays.asList(langs)), accept)); +177 } +178 } +179 } +180 } +181 return new Lang(rules, languages); +182 } +183 +184 private final Languages languages; +185 private final List<LangRule> rules; +186 +187 private Lang(final List<LangRule> rules, final Languages languages) { +188 this.rules = Collections.unmodifiableList(rules); +189 this.languages = languages; +190 } +191 +192 /** +193 * Guesses the language of a word. +194 * +195 * @param text +196 * the word +197 * @return the language that the word originates from or {@link Languages#ANY} if there was no unique match +198 */ +199 public String guessLanguage(final String text) { +200 final Languages.LanguageSet ls = guessLanguages(text); +201 return ls.isSingleton() ? ls.getAny() : Languages.ANY; +202 } +203 +204 /** +205 * Guesses the languages of a word. +206 * +207 * @param input +208 * the word +209 * @return a Set of Strings of language names that are potential matches for the input word +210 */ +211 public Languages.LanguageSet guessLanguages(final String input) { +212 final String text = input.toLowerCase(Locale.ENGLISH); +213 +214 final Set<String> langs = new HashSet<>(this.languages.getLanguages()); +215 for (final LangRule rule : this.rules) { +216 if (rule.matches(text)) { +217 if (rule.acceptOnMatch) { +218 langs.retainAll(rule.languages); +219 } else { +220 langs.removeAll(rule.languages); +221 } +222 } +223 } +224 +225 final Languages.LanguageSet ls = Languages.LanguageSet.from(langs); +226 return ls.equals(Languages.NO_LANGUAGES) ? Languages.ANY_LANGUAGE : ls; +227 } +228} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.Collections; +022import java.util.EnumMap; +023import java.util.HashSet; +024import java.util.Map; +025import java.util.NoSuchElementException; +026import java.util.Scanner; +027import java.util.Set; +028 +029/** +030 * Language codes. +031 * <p> +032 * Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are +033 * systematically named following the pattern: +034 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote> +035 * <p> +036 * The format of these resources is the following: +037 * <ul> +038 * <li><b>Language:</b> a single string containing no whitespace</li> +039 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be +040 * discarded as a comment.</li> +041 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. +042 * This will skip all content until a line ending in '*' and '/' is found.</li> +043 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +044 * </ul> +045 * <p> +046 * Ported from language.php +047 * <p> +048 * This class is immutable and thread-safe. +049 * +050 * @since 1.6 +051 * @version $Id$ +052 */ +053public class Languages { +054 // Implementation note: This class is divided into two sections. The first part is a static factory interface that +055 // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported +056 // languages, and a second part that provides instance methods for accessing this set for supported languages. +057 +058 /** +059 * A set of languages. +060 */ +061 public static abstract class LanguageSet { +062 +063 public static LanguageSet from(final Set<String> langs) { +064 return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs); +065 } +066 +067 public abstract boolean contains(String language); +068 +069 public abstract String getAny(); +070 +071 public abstract boolean isEmpty(); +072 +073 public abstract boolean isSingleton(); +074 +075 public abstract LanguageSet restrictTo(LanguageSet other); +076 +077 abstract LanguageSet merge(LanguageSet other); +078 } +079 +080 /** +081 * Some languages, explicitly enumerated. +082 */ +083 public static final class SomeLanguages extends LanguageSet { +084 private final Set<String> languages; +085 +086 private SomeLanguages(final Set<String> languages) { +087 this.languages = Collections.unmodifiableSet(languages); +088 } +089 +090 @Override +091 public boolean contains(final String language) { +092 return this.languages.contains(language); +093 } +094 +095 @Override +096 public String getAny() { +097 return this.languages.iterator().next(); +098 } +099 +100 public Set<String> getLanguages() { +101 return this.languages; +102 } +103 +104 @Override +105 public boolean isEmpty() { +106 return this.languages.isEmpty(); +107 } +108 +109 @Override +110 public boolean isSingleton() { +111 return this.languages.size() == 1; +112 } +113 +114 @Override +115 public LanguageSet restrictTo(final LanguageSet other) { +116 if (other == NO_LANGUAGES) { +117 return other; +118 } else if (other == ANY_LANGUAGE) { +119 return this; +120 } else { +121 final SomeLanguages sl = (SomeLanguages) other; +122 final Set<String> ls = new HashSet<>(Math.min(languages.size(), sl.languages.size())); +123 for (final String lang : languages) { +124 if (sl.languages.contains(lang)) { +125 ls.add(lang); +126 } +127 } +128 return from(ls); +129 } +130 } +131 +132 @Override +133 public LanguageSet merge(final LanguageSet other) { +134 if (other == NO_LANGUAGES) { +135 return this; +136 } else if (other == ANY_LANGUAGE) { +137 return other; +138 } else { +139 final SomeLanguages sl = (SomeLanguages) other; +140 final Set<String> ls = new HashSet<>(languages); +141 for (final String lang : sl.languages) { +142 ls.add(lang); +143 } +144 return from(ls); +145 } +146 } +147 +148 @Override +149 public String toString() { +150 return "Languages(" + languages.toString() + ")"; +151 } +152 +153 } +154 +155 public static final String ANY = "any"; +156 +157 private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class); +158 +159 static { +160 for (final NameType s : NameType.values()) { +161 LANGUAGES.put(s, getInstance(langResourceName(s))); +162 } +163 } +164 +165 public static Languages getInstance(final NameType nameType) { +166 return LANGUAGES.get(nameType); +167 } +168 +169 public static Languages getInstance(final String languagesResourceName) { +170 // read languages list +171 final Set<String> ls = new HashSet<>(); +172 final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName); +173 +174 if (langIS == null) { +175 throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName); +176 } +177 +178 try (final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING)) { +179 boolean inExtendedComment = false; +180 while (lsScanner.hasNextLine()) { +181 final String line = lsScanner.nextLine().trim(); +182 if (inExtendedComment) { +183 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +184 inExtendedComment = false; +185 } +186 } else { +187 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +188 inExtendedComment = true; +189 } else if (line.length() > 0) { +190 ls.add(line); +191 } +192 } +193 } +194 } +195 +196 return new Languages(Collections.unmodifiableSet(ls)); +197 } +198 +199 private static String langResourceName(final NameType nameType) { +200 return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); +201 } +202 +203 private final Set<String> languages; +204 +205 /** +206 * No languages at all. +207 */ +208 public static final LanguageSet NO_LANGUAGES = new LanguageSet() { +209 @Override +210 public boolean contains(final String language) { +211 return false; +212 } +213 +214 @Override +215 public String getAny() { +216 throw new NoSuchElementException("Can't fetch any language from the empty language set."); +217 } +218 +219 @Override +220 public boolean isEmpty() { +221 return true; +222 } +223 +224 @Override +225 public boolean isSingleton() { +226 return false; +227 } +228 +229 @Override +230 public LanguageSet restrictTo(final LanguageSet other) { +231 return this; +232 } +233 +234 @Override +235 public LanguageSet merge(final LanguageSet other) { +236 return other; +237 } +238 +239 @Override +240 public String toString() { +241 return "NO_LANGUAGES"; +242 } +243 }; +244 +245 /** +246 * Any/all languages. +247 */ +248 public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { +249 @Override +250 public boolean contains(final String language) { +251 return true; +252 } +253 +254 @Override +255 public String getAny() { +256 throw new NoSuchElementException("Can't fetch any language from the any language set."); +257 } +258 +259 @Override +260 public boolean isEmpty() { +261 return false; +262 } +263 +264 @Override +265 public boolean isSingleton() { +266 return false; +267 } +268 +269 @Override +270 public LanguageSet restrictTo(final LanguageSet other) { +271 return other; +272 } +273 +274 @Override +275 public LanguageSet merge(final LanguageSet other) { +276 return other; +277 } +278 +279 @Override +280 public String toString() { +281 return "ANY_LANGUAGE"; +282 } +283 }; +284 +285 private Languages(final Set<String> languages) { +286 this.languages = languages; +287 } +288 +289 public Set<String> getLanguages() { +290 return this.languages; +291 } +292} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.Collections; +022import java.util.EnumMap; +023import java.util.HashSet; +024import java.util.Map; +025import java.util.NoSuchElementException; +026import java.util.Scanner; +027import java.util.Set; +028 +029/** +030 * Language codes. +031 * <p> +032 * Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are +033 * systematically named following the pattern: +034 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote> +035 * <p> +036 * The format of these resources is the following: +037 * <ul> +038 * <li><b>Language:</b> a single string containing no whitespace</li> +039 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be +040 * discarded as a comment.</li> +041 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. +042 * This will skip all content until a line ending in '*' and '/' is found.</li> +043 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +044 * </ul> +045 * <p> +046 * Ported from language.php +047 * <p> +048 * This class is immutable and thread-safe. +049 * +050 * @since 1.6 +051 * @version $Id$ +052 */ +053public class Languages { +054 // Implementation note: This class is divided into two sections. The first part is a static factory interface that +055 // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported +056 // languages, and a second part that provides instance methods for accessing this set for supported languages. +057 +058 /** +059 * A set of languages. +060 */ +061 public static abstract class LanguageSet { +062 +063 public static LanguageSet from(final Set<String> langs) { +064 return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs); +065 } +066 +067 public abstract boolean contains(String language); +068 +069 public abstract String getAny(); +070 +071 public abstract boolean isEmpty(); +072 +073 public abstract boolean isSingleton(); +074 +075 public abstract LanguageSet restrictTo(LanguageSet other); +076 +077 abstract LanguageSet merge(LanguageSet other); +078 } +079 +080 /** +081 * Some languages, explicitly enumerated. +082 */ +083 public static final class SomeLanguages extends LanguageSet { +084 private final Set<String> languages; +085 +086 private SomeLanguages(final Set<String> languages) { +087 this.languages = Collections.unmodifiableSet(languages); +088 } +089 +090 @Override +091 public boolean contains(final String language) { +092 return this.languages.contains(language); +093 } +094 +095 @Override +096 public String getAny() { +097 return this.languages.iterator().next(); +098 } +099 +100 public Set<String> getLanguages() { +101 return this.languages; +102 } +103 +104 @Override +105 public boolean isEmpty() { +106 return this.languages.isEmpty(); +107 } +108 +109 @Override +110 public boolean isSingleton() { +111 return this.languages.size() == 1; +112 } +113 +114 @Override +115 public LanguageSet restrictTo(final LanguageSet other) { +116 if (other == NO_LANGUAGES) { +117 return other; +118 } else if (other == ANY_LANGUAGE) { +119 return this; +120 } else { +121 final SomeLanguages sl = (SomeLanguages) other; +122 final Set<String> ls = new HashSet<>(Math.min(languages.size(), sl.languages.size())); +123 for (final String lang : languages) { +124 if (sl.languages.contains(lang)) { +125 ls.add(lang); +126 } +127 } +128 return from(ls); +129 } +130 } +131 +132 @Override +133 public LanguageSet merge(final LanguageSet other) { +134 if (other == NO_LANGUAGES) { +135 return this; +136 } else if (other == ANY_LANGUAGE) { +137 return other; +138 } else { +139 final SomeLanguages sl = (SomeLanguages) other; +140 final Set<String> ls = new HashSet<>(languages); +141 for (final String lang : sl.languages) { +142 ls.add(lang); +143 } +144 return from(ls); +145 } +146 } +147 +148 @Override +149 public String toString() { +150 return "Languages(" + languages.toString() + ")"; +151 } +152 +153 } +154 +155 public static final String ANY = "any"; +156 +157 private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class); +158 +159 static { +160 for (final NameType s : NameType.values()) { +161 LANGUAGES.put(s, getInstance(langResourceName(s))); +162 } +163 } +164 +165 public static Languages getInstance(final NameType nameType) { +166 return LANGUAGES.get(nameType); +167 } +168 +169 public static Languages getInstance(final String languagesResourceName) { +170 // read languages list +171 final Set<String> ls = new HashSet<>(); +172 final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName); +173 +174 if (langIS == null) { +175 throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName); +176 } +177 +178 try (final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING)) { +179 boolean inExtendedComment = false; +180 while (lsScanner.hasNextLine()) { +181 final String line = lsScanner.nextLine().trim(); +182 if (inExtendedComment) { +183 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +184 inExtendedComment = false; +185 } +186 } else { +187 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +188 inExtendedComment = true; +189 } else if (line.length() > 0) { +190 ls.add(line); +191 } +192 } +193 } +194 } +195 +196 return new Languages(Collections.unmodifiableSet(ls)); +197 } +198 +199 private static String langResourceName(final NameType nameType) { +200 return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); +201 } +202 +203 private final Set<String> languages; +204 +205 /** +206 * No languages at all. +207 */ +208 public static final LanguageSet NO_LANGUAGES = new LanguageSet() { +209 @Override +210 public boolean contains(final String language) { +211 return false; +212 } +213 +214 @Override +215 public String getAny() { +216 throw new NoSuchElementException("Can't fetch any language from the empty language set."); +217 } +218 +219 @Override +220 public boolean isEmpty() { +221 return true; +222 } +223 +224 @Override +225 public boolean isSingleton() { +226 return false; +227 } +228 +229 @Override +230 public LanguageSet restrictTo(final LanguageSet other) { +231 return this; +232 } +233 +234 @Override +235 public LanguageSet merge(final LanguageSet other) { +236 return other; +237 } +238 +239 @Override +240 public String toString() { +241 return "NO_LANGUAGES"; +242 } +243 }; +244 +245 /** +246 * Any/all languages. +247 */ +248 public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { +249 @Override +250 public boolean contains(final String language) { +251 return true; +252 } +253 +254 @Override +255 public String getAny() { +256 throw new NoSuchElementException("Can't fetch any language from the any language set."); +257 } +258 +259 @Override +260 public boolean isEmpty() { +261 return false; +262 } +263 +264 @Override +265 public boolean isSingleton() { +266 return false; +267 } +268 +269 @Override +270 public LanguageSet restrictTo(final LanguageSet other) { +271 return other; +272 } +273 +274 @Override +275 public LanguageSet merge(final LanguageSet other) { +276 return other; +277 } +278 +279 @Override +280 public String toString() { +281 return "ANY_LANGUAGE"; +282 } +283 }; +284 +285 private Languages(final Set<String> languages) { +286 this.languages = languages; +287 } +288 +289 public Set<String> getLanguages() { +290 return this.languages; +291 } +292} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.Collections; +022import java.util.EnumMap; +023import java.util.HashSet; +024import java.util.Map; +025import java.util.NoSuchElementException; +026import java.util.Scanner; +027import java.util.Set; +028 +029/** +030 * Language codes. +031 * <p> +032 * Language codes are typically loaded from resource files. These are UTF-8 encoded text files. They are +033 * systematically named following the pattern: +034 * <blockquote>org/apache/commons/codec/language/bm/${{@link NameType#getName()} languages.txt</blockquote> +035 * <p> +036 * The format of these resources is the following: +037 * <ul> +038 * <li><b>Language:</b> a single string containing no whitespace</li> +039 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be +040 * discarded as a comment.</li> +041 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. +042 * This will skip all content until a line ending in '*' and '/' is found.</li> +043 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +044 * </ul> +045 * <p> +046 * Ported from language.php +047 * <p> +048 * This class is immutable and thread-safe. +049 * +050 * @since 1.6 +051 * @version $Id$ +052 */ +053public class Languages { +054 // Implementation note: This class is divided into two sections. The first part is a static factory interface that +055 // exposes org/apache/commons/codec/language/bm/%s_languages.txt for %s in NameType.* as a list of supported +056 // languages, and a second part that provides instance methods for accessing this set for supported languages. +057 +058 /** +059 * A set of languages. +060 */ +061 public static abstract class LanguageSet { +062 +063 public static LanguageSet from(final Set<String> langs) { +064 return langs.isEmpty() ? NO_LANGUAGES : new SomeLanguages(langs); +065 } +066 +067 public abstract boolean contains(String language); +068 +069 public abstract String getAny(); +070 +071 public abstract boolean isEmpty(); +072 +073 public abstract boolean isSingleton(); +074 +075 public abstract LanguageSet restrictTo(LanguageSet other); +076 +077 abstract LanguageSet merge(LanguageSet other); +078 } +079 +080 /** +081 * Some languages, explicitly enumerated. +082 */ +083 public static final class SomeLanguages extends LanguageSet { +084 private final Set<String> languages; +085 +086 private SomeLanguages(final Set<String> languages) { +087 this.languages = Collections.unmodifiableSet(languages); +088 } +089 +090 @Override +091 public boolean contains(final String language) { +092 return this.languages.contains(language); +093 } +094 +095 @Override +096 public String getAny() { +097 return this.languages.iterator().next(); +098 } +099 +100 public Set<String> getLanguages() { +101 return this.languages; +102 } +103 +104 @Override +105 public boolean isEmpty() { +106 return this.languages.isEmpty(); +107 } +108 +109 @Override +110 public boolean isSingleton() { +111 return this.languages.size() == 1; +112 } +113 +114 @Override +115 public LanguageSet restrictTo(final LanguageSet other) { +116 if (other == NO_LANGUAGES) { +117 return other; +118 } else if (other == ANY_LANGUAGE) { +119 return this; +120 } else { +121 final SomeLanguages sl = (SomeLanguages) other; +122 final Set<String> ls = new HashSet<>(Math.min(languages.size(), sl.languages.size())); +123 for (final String lang : languages) { +124 if (sl.languages.contains(lang)) { +125 ls.add(lang); +126 } +127 } +128 return from(ls); +129 } +130 } +131 +132 @Override +133 public LanguageSet merge(final LanguageSet other) { +134 if (other == NO_LANGUAGES) { +135 return this; +136 } else if (other == ANY_LANGUAGE) { +137 return other; +138 } else { +139 final SomeLanguages sl = (SomeLanguages) other; +140 final Set<String> ls = new HashSet<>(languages); +141 for (final String lang : sl.languages) { +142 ls.add(lang); +143 } +144 return from(ls); +145 } +146 } +147 +148 @Override +149 public String toString() { +150 return "Languages(" + languages.toString() + ")"; +151 } +152 +153 } +154 +155 public static final String ANY = "any"; +156 +157 private static final Map<NameType, Languages> LANGUAGES = new EnumMap<>(NameType.class); +158 +159 static { +160 for (final NameType s : NameType.values()) { +161 LANGUAGES.put(s, getInstance(langResourceName(s))); +162 } +163 } +164 +165 public static Languages getInstance(final NameType nameType) { +166 return LANGUAGES.get(nameType); +167 } +168 +169 public static Languages getInstance(final String languagesResourceName) { +170 // read languages list +171 final Set<String> ls = new HashSet<>(); +172 final InputStream langIS = Languages.class.getClassLoader().getResourceAsStream(languagesResourceName); +173 +174 if (langIS == null) { +175 throw new IllegalArgumentException("Unable to resolve required resource: " + languagesResourceName); +176 } +177 +178 try (final Scanner lsScanner = new Scanner(langIS, ResourceConstants.ENCODING)) { +179 boolean inExtendedComment = false; +180 while (lsScanner.hasNextLine()) { +181 final String line = lsScanner.nextLine().trim(); +182 if (inExtendedComment) { +183 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +184 inExtendedComment = false; +185 } +186 } else { +187 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +188 inExtendedComment = true; +189 } else if (line.length() > 0) { +190 ls.add(line); +191 } +192 } +193 } +194 } +195 +196 return new Languages(Collections.unmodifiableSet(ls)); +197 } +198 +199 private static String langResourceName(final NameType nameType) { +200 return String.format("org/apache/commons/codec/language/bm/%s_languages.txt", nameType.getName()); +201 } +202 +203 private final Set<String> languages; +204 +205 /** +206 * No languages at all. +207 */ +208 public static final LanguageSet NO_LANGUAGES = new LanguageSet() { +209 @Override +210 public boolean contains(final String language) { +211 return false; +212 } +213 +214 @Override +215 public String getAny() { +216 throw new NoSuchElementException("Can't fetch any language from the empty language set."); +217 } +218 +219 @Override +220 public boolean isEmpty() { +221 return true; +222 } +223 +224 @Override +225 public boolean isSingleton() { +226 return false; +227 } +228 +229 @Override +230 public LanguageSet restrictTo(final LanguageSet other) { +231 return this; +232 } +233 +234 @Override +235 public LanguageSet merge(final LanguageSet other) { +236 return other; +237 } +238 +239 @Override +240 public String toString() { +241 return "NO_LANGUAGES"; +242 } +243 }; +244 +245 /** +246 * Any/all languages. +247 */ +248 public static final LanguageSet ANY_LANGUAGE = new LanguageSet() { +249 @Override +250 public boolean contains(final String language) { +251 return true; +252 } +253 +254 @Override +255 public String getAny() { +256 throw new NoSuchElementException("Can't fetch any language from the any language set."); +257 } +258 +259 @Override +260 public boolean isEmpty() { +261 return false; +262 } +263 +264 @Override +265 public boolean isSingleton() { +266 return false; +267 } +268 +269 @Override +270 public LanguageSet restrictTo(final LanguageSet other) { +271 return other; +272 } +273 +274 @Override +275 public LanguageSet merge(final LanguageSet other) { +276 return other; +277 } +278 +279 @Override +280 public String toString() { +281 return "ANY_LANGUAGE"; +282 } +283 }; +284 +285 private Languages(final Set<String> languages) { +286 this.languages = languages; +287 } +288 +289 public Set<String> getLanguages() { +290 return this.languages; +291 } +292} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020/** +021 * Supported types of names. Unless you are matching particular family names, use {@link #GENERIC}. The +022 * <code>GENERIC</code> NameType should work reasonably well for non-name words. The other encodings are +023 * specifically tuned to family names, and may not work well at all for general text. +024 * +025 * @since 1.6 +026 * @version $Id$ +027 */ +028public enum NameType { +029 +030 /** Ashkenazi family names */ +031 ASHKENAZI("ash"), +032 +033 /** Generic names and words */ +034 GENERIC("gen"), +035 +036 /** Sephardic family names */ +037 SEPHARDIC("sep"); +038 +039 private final String name; +040 +041 NameType(final String name) { +042 this.name = name; +043 } +044 +045 /** +046 * Gets the short version of the name type. +047 * +048 * @return the NameType short string +049 */ +050 public String getName() { +051 return this.name; +052 } +053} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.util.ArrayList; +021import java.util.Arrays; +022import java.util.Collections; +023import java.util.EnumMap; +024import java.util.HashSet; +025import java.util.Iterator; +026import java.util.LinkedHashSet; +027import java.util.List; +028import java.util.Locale; +029import java.util.Map; +030import java.util.Set; +031import java.util.TreeMap; +032 +033import org.apache.commons.codec.language.bm.Languages.LanguageSet; +034import org.apache.commons.codec.language.bm.Rule.Phoneme; +035 +036/** +037 * Converts words into potential phonetic representations. +038 * <p> +039 * This is a two-stage process. Firstly, the word is converted into a phonetic representation that takes +040 * into account the likely source language. Next, this phonetic representation is converted into a +041 * pan-European 'average' representation, allowing comparison between different versions of essentially +042 * the same word from different languages. +043 * <p> +044 * This class is intentionally immutable and thread-safe. +045 * If you wish to alter the settings for a PhoneticEngine, you +046 * must make a new one with the updated settings. +047 * <p> +048 * Ported from phoneticengine.php +049 * +050 * @since 1.6 +051 * @version $Id$ +052 */ +053public class PhoneticEngine { +054 +055 /** +056 * Utility for manipulating a set of phonemes as they are being built up. Not intended for use outside +057 * this package, and probably not outside the {@link PhoneticEngine} class. +058 * +059 * @since 1.6 +060 */ +061 static final class PhonemeBuilder { +062 +063 /** +064 * An empty builder where all phonemes must come from some set of languages. This will contain a single +065 * phoneme of zero characters. This can then be appended to. This should be the only way to create a new +066 * phoneme from scratch. +067 * +068 * @param languages the set of languages +069 * @return a new, empty phoneme builder +070 */ +071 public static PhonemeBuilder empty(final Languages.LanguageSet languages) { +072 return new PhonemeBuilder(new Rule.Phoneme("", languages)); +073 } +074 +075 private final Set<Rule.Phoneme> phonemes; +076 +077 private PhonemeBuilder(final Rule.Phoneme phoneme) { +078 this.phonemes = new LinkedHashSet<>(); +079 this.phonemes.add(phoneme); +080 } +081 +082 private PhonemeBuilder(final Set<Rule.Phoneme> phonemes) { +083 this.phonemes = phonemes; +084 } +085 +086 /** +087 * Creates a new phoneme builder containing all phonemes in this one extended by <code>str</code>. +088 * +089 * @param str the characters to append to the phonemes +090 */ +091 public void append(final CharSequence str) { +092 for (final Rule.Phoneme ph : this.phonemes) { +093 ph.append(str); +094 } +095 } +096 +097 /** +098 * Applies the given phoneme expression to all phonemes in this phoneme builder. +099 * <p> +100 * This will lengthen phonemes that have compatible language sets to the expression, and drop those that are +101 * incompatible. +102 * +103 * @param phonemeExpr the expression to apply +104 * @param maxPhonemes the maximum number of phonemes to build up +105 */ +106 public void apply(final Rule.PhonemeExpr phonemeExpr, final int maxPhonemes) { +107 final Set<Rule.Phoneme> newPhonemes = new LinkedHashSet<>(maxPhonemes); +108 +109 EXPR: for (final Rule.Phoneme left : this.phonemes) { +110 for (final Rule.Phoneme right : phonemeExpr.getPhonemes()) { +111 final LanguageSet languages = left.getLanguages().restrictTo(right.getLanguages()); +112 if (!languages.isEmpty()) { +113 final Rule.Phoneme join = new Phoneme(left, right, languages); +114 if (newPhonemes.size() < maxPhonemes) { +115 newPhonemes.add(join); +116 if (newPhonemes.size() >= maxPhonemes) { +117 break EXPR; +118 } +119 } +120 } +121 } +122 } +123 +124 this.phonemes.clear(); +125 this.phonemes.addAll(newPhonemes); +126 } +127 +128 /** +129 * Gets underlying phoneme set. Please don't mutate. +130 * +131 * @return the phoneme set +132 */ +133 public Set<Rule.Phoneme> getPhonemes() { +134 return this.phonemes; +135 } +136 +137 /** +138 * Stringifies the phoneme set. This produces a single string of the strings of each phoneme, +139 * joined with a pipe. This is explicitly provided in place of toString as it is a potentially +140 * expensive operation, which should be avoided when debugging. +141 * +142 * @return the stringified phoneme set +143 */ +144 public String makeString() { +145 final StringBuilder sb = new StringBuilder(); +146 +147 for (final Rule.Phoneme ph : this.phonemes) { +148 if (sb.length() > 0) { +149 sb.append("|"); +150 } +151 sb.append(ph.getPhonemeText()); +152 } +153 +154 return sb.toString(); +155 } +156 } +157 +158 /** +159 * A function closure capturing the application of a list of rules to an input sequence at a particular offset. +160 * After invocation, the values <code>i</code> and <code>found</code> are updated. <code>i</code> points to the +161 * index of the next char in <code>input</code> that must be processed next (the input up to that index having been +162 * processed already), and <code>found</code> indicates if a matching rule was found or not. In the case where a +163 * matching rule was found, <code>phonemeBuilder</code> is replaced with a new builder containing the phonemes +164 * updated by the matching rule. +165 * +166 * Although this class is not thread-safe (it has mutable unprotected fields), it is not shared between threads +167 * as it is constructed as needed by the calling methods. +168 * @since 1.6 +169 */ +170 private static final class RulesApplication { +171 private final Map<String, List<Rule>> finalRules; +172 private final CharSequence input; +173 +174 private final PhonemeBuilder phonemeBuilder; +175 private int i; +176 private final int maxPhonemes; +177 private boolean found; +178 +179 public RulesApplication(final Map<String, List<Rule>> finalRules, final CharSequence input, +180 final PhonemeBuilder phonemeBuilder, final int i, final int maxPhonemes) { +181 if (finalRules == null) { +182 throw new NullPointerException("The finalRules argument must not be null"); +183 } +184 this.finalRules = finalRules; +185 this.phonemeBuilder = phonemeBuilder; +186 this.input = input; +187 this.i = i; +188 this.maxPhonemes = maxPhonemes; +189 } +190 +191 public int getI() { +192 return this.i; +193 } +194 +195 public PhonemeBuilder getPhonemeBuilder() { +196 return this.phonemeBuilder; +197 } +198 +199 /** +200 * Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context +201 * and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no +202 * match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling. +203 * +204 * @return <code>this</code> +205 */ +206 public RulesApplication invoke() { +207 this.found = false; +208 int patternLength = 1; +209 final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength)); +210 if (rules != null) { +211 for (final Rule rule : rules) { +212 final String pattern = rule.getPattern(); +213 patternLength = pattern.length(); +214 if (rule.patternAndContextMatches(this.input, this.i)) { +215 this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes); +216 this.found = true; +217 break; +218 } +219 } +220 } +221 +222 if (!this.found) { +223 patternLength = 1; +224 } +225 +226 this.i += patternLength; +227 return this; +228 } +229 +230 public boolean isFound() { +231 return this.found; +232 } +233 } +234 +235 private static final Map<NameType, Set<String>> NAME_PREFIXES = new EnumMap<>(NameType.class); +236 +237 static { +238 NAME_PREFIXES.put(NameType.ASHKENAZI, +239 Collections.unmodifiableSet( +240 new HashSet<>(Arrays.asList("bar", "ben", "da", "de", "van", "von")))); +241 NAME_PREFIXES.put(NameType.SEPHARDIC, +242 Collections.unmodifiableSet( +243 new HashSet<>(Arrays.asList("al", "el", "da", "dal", "de", "del", "dela", "de la", +244 "della", "des", "di", "do", "dos", "du", "van", "von")))); +245 NAME_PREFIXES.put(NameType.GENERIC, +246 Collections.unmodifiableSet( +247 new HashSet<>(Arrays.asList("da", "dal", "de", "del", "dela", "de la", "della", +248 "des", "di", "do", "dos", "du", "van", "von")))); +249 } +250 +251 /** +252 * Joins some strings with an internal separator. +253 * @param strings Strings to join +254 * @param sep String to separate them with +255 * @return a single String consisting of each element of <code>strings</code> interleaved by <code>sep</code> +256 */ +257 private static String join(final Iterable<String> strings, final String sep) { +258 final StringBuilder sb = new StringBuilder(); +259 final Iterator<String> si = strings.iterator(); +260 if (si.hasNext()) { +261 sb.append(si.next()); +262 } +263 while (si.hasNext()) { +264 sb.append(sep).append(si.next()); +265 } +266 +267 return sb.toString(); +268 } +269 +270 private static final int DEFAULT_MAX_PHONEMES = 20; +271 +272 private final Lang lang; +273 +274 private final NameType nameType; +275 +276 private final RuleType ruleType; +277 +278 private final boolean concat; +279 +280 private final int maxPhonemes; +281 +282 /** +283 * Generates a new, fully-configured phonetic engine. +284 * +285 * @param nameType +286 * the type of names it will use +287 * @param ruleType +288 * the type of rules it will apply +289 * @param concat +290 * if it will concatenate multiple encodings +291 */ +292 public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat) { +293 this(nameType, ruleType, concat, DEFAULT_MAX_PHONEMES); +294 } +295 +296 /** +297 * Generates a new, fully-configured phonetic engine. +298 * +299 * @param nameType +300 * the type of names it will use +301 * @param ruleType +302 * the type of rules it will apply +303 * @param concat +304 * if it will concatenate multiple encodings +305 * @param maxPhonemes +306 * the maximum number of phonemes that will be handled +307 * @since 1.7 +308 */ +309 public PhoneticEngine(final NameType nameType, final RuleType ruleType, final boolean concat, +310 final int maxPhonemes) { +311 if (ruleType == RuleType.RULES) { +312 throw new IllegalArgumentException("ruleType must not be " + RuleType.RULES); +313 } +314 this.nameType = nameType; +315 this.ruleType = ruleType; +316 this.concat = concat; +317 this.lang = Lang.instance(nameType); +318 this.maxPhonemes = maxPhonemes; +319 } +320 +321 /** +322 * Applies the final rules to convert from a language-specific phonetic representation to a +323 * language-independent representation. +324 * +325 * @param phonemeBuilder the current phonemes +326 * @param finalRules the final rules to apply +327 * @return the resulting phonemes +328 */ +329 private PhonemeBuilder applyFinalRules(final PhonemeBuilder phonemeBuilder, +330 final Map<String, List<Rule>> finalRules) { +331 if (finalRules == null) { +332 throw new NullPointerException("finalRules can not be null"); +333 } +334 if (finalRules.isEmpty()) { +335 return phonemeBuilder; +336 } +337 +338 final Map<Rule.Phoneme, Rule.Phoneme> phonemes = +339 new TreeMap<>(Rule.Phoneme.COMPARATOR); +340 +341 for (final Rule.Phoneme phoneme : phonemeBuilder.getPhonemes()) { +342 PhonemeBuilder subBuilder = PhonemeBuilder.empty(phoneme.getLanguages()); +343 final String phonemeText = phoneme.getPhonemeText().toString(); +344 +345 for (int i = 0; i < phonemeText.length();) { +346 final RulesApplication rulesApplication = +347 new RulesApplication(finalRules, phonemeText, subBuilder, i, maxPhonemes).invoke(); +348 final boolean found = rulesApplication.isFound(); +349 subBuilder = rulesApplication.getPhonemeBuilder(); +350 +351 if (!found) { +352 // not found, appending as-is +353 subBuilder.append(phonemeText.subSequence(i, i + 1)); +354 } +355 +356 i = rulesApplication.getI(); +357 } +358 +359 // the phonemes map orders the phonemes only based on their text, but ignores the language set +360 // when adding new phonemes, check for equal phonemes and merge their language set, otherwise +361 // phonemes with the same text but different language set get lost +362 for (final Rule.Phoneme newPhoneme : subBuilder.getPhonemes()) { +363 if (phonemes.containsKey(newPhoneme)) { +364 final Rule.Phoneme oldPhoneme = phonemes.remove(newPhoneme); +365 final Rule.Phoneme mergedPhoneme = oldPhoneme.mergeWithLanguage(newPhoneme.getLanguages()); +366 phonemes.put(mergedPhoneme, mergedPhoneme); +367 } else { +368 phonemes.put(newPhoneme, newPhoneme); +369 } +370 } +371 } +372 +373 return new PhonemeBuilder(phonemes.keySet()); +374 } +375 +376 /** +377 * Encodes a string to its phonetic representation. +378 * +379 * @param input +380 * the String to encode +381 * @return the encoding of the input +382 */ +383 public String encode(final String input) { +384 final Languages.LanguageSet languageSet = this.lang.guessLanguages(input); +385 return encode(input, languageSet); +386 } +387 +388 /** +389 * Encodes an input string into an output phonetic representation, given a set of possible origin languages. +390 * +391 * @param input +392 * String to phoneticise; a String with dashes or spaces separating each word +393 * @param languageSet +394 * set of possible origin languages +395 * @return a phonetic representation of the input; a String containing '-'-separated phonetic representations of the +396 * input +397 */ +398 public String encode(String input, final Languages.LanguageSet languageSet) { +399 final Map<String, List<Rule>> rules = Rule.getInstanceMap(this.nameType, RuleType.RULES, languageSet); +400 // rules common across many (all) languages +401 final Map<String, List<Rule>> finalRules1 = Rule.getInstanceMap(this.nameType, this.ruleType, "common"); +402 // rules that apply to a specific language that may be ambiguous or wrong if applied to other languages +403 final Map<String, List<Rule>> finalRules2 = Rule.getInstanceMap(this.nameType, this.ruleType, languageSet); +404 +405 // tidy the input +406 // lower case is a locale-dependent operation +407 input = input.toLowerCase(Locale.ENGLISH).replace('-', ' ').trim(); +408 +409 if (this.nameType == NameType.GENERIC) { +410 if (input.length() >= 2 && input.substring(0, 2).equals("d'")) { // check for d' +411 final String remainder = input.substring(2); +412 final String combined = "d" + remainder; +413 return "(" + encode(remainder) + ")-(" + encode(combined) + ")"; +414 } +415 for (final String l : NAME_PREFIXES.get(this.nameType)) { +416 // handle generic prefixes +417 if (input.startsWith(l + " ")) { +418 // check for any prefix in the words list +419 final String remainder = input.substring(l.length() + 1); // input without the prefix +420 final String combined = l + remainder; // input with prefix without space +421 return "(" + encode(remainder) + ")-(" + encode(combined) + ")"; +422 } +423 } +424 } +425 +426 final List<String> words = Arrays.asList(input.split("\\s+")); +427 final List<String> words2 = new ArrayList<>(); +428 +429 // special-case handling of word prefixes based upon the name type +430 switch (this.nameType) { +431 case SEPHARDIC: +432 for (final String aWord : words) { +433 final String[] parts = aWord.split("'"); +434 final String lastPart = parts[parts.length - 1]; +435 words2.add(lastPart); +436 } +437 words2.removeAll(NAME_PREFIXES.get(this.nameType)); +438 break; +439 case ASHKENAZI: +440 words2.addAll(words); +441 words2.removeAll(NAME_PREFIXES.get(this.nameType)); +442 break; +443 case GENERIC: +444 words2.addAll(words); +445 break; +446 default: +447 throw new IllegalStateException("Unreachable case: " + this.nameType); +448 } +449 +450 if (this.concat) { +451 // concat mode enabled +452 input = join(words2, " "); +453 } else if (words2.size() == 1) { +454 // not a multi-word name +455 input = words.iterator().next(); +456 } else { +457 // encode each word in a multi-word name separately (normally used for approx matches) +458 final StringBuilder result = new StringBuilder(); +459 for (final String word : words2) { +460 result.append("-").append(encode(word)); +461 } +462 // return the result without the leading "-" +463 return result.substring(1); +464 } +465 +466 PhonemeBuilder phonemeBuilder = PhonemeBuilder.empty(languageSet); +467 +468 // loop over each char in the input - we will handle the increment manually +469 for (int i = 0; i < input.length();) { +470 final RulesApplication rulesApplication = +471 new RulesApplication(rules, input, phonemeBuilder, i, maxPhonemes).invoke(); +472 i = rulesApplication.getI(); +473 phonemeBuilder = rulesApplication.getPhonemeBuilder(); +474 } +475 +476 // Apply the general rules +477 phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules1); +478 // Apply the language-specific rules +479 phonemeBuilder = applyFinalRules(phonemeBuilder, finalRules2); +480 +481 return phonemeBuilder.makeString(); +482 } +483 +484 /** +485 * Gets the Lang language guessing rules being used. +486 * +487 * @return the Lang in use +488 */ +489 public Lang getLang() { +490 return this.lang; +491 } +492 +493 /** +494 * Gets the NameType being used. +495 * +496 * @return the NameType in use +497 */ +498 public NameType getNameType() { +499 return this.nameType; +500 } +501 +502 /** +503 * Gets the RuleType being used. +504 * +505 * @return the RuleType in use +506 */ +507 public RuleType getRuleType() { +508 return this.ruleType; +509 } +510 +511 /** +512 * Gets if multiple phonetic encodings are concatenated or if just the first one is kept. +513 * +514 * @return true if multiple phonetic encodings are returned, false if just the first is +515 */ +516 public boolean isConcat() { +517 return this.concat; +518 } +519 +520 /** +521 * Gets the maximum number of phonemes the engine will calculate for a given input. +522 * +523 * @return the maximum number of phonemes +524 * @since 1.7 +525 */ +526 public int getMaxPhonemes() { +527 return this.maxPhonemes; +528 } +529} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.Comparator; +025import java.util.EnumMap; +026import java.util.HashMap; +027import java.util.HashSet; +028import java.util.List; +029import java.util.Map; +030import java.util.Scanner; +031import java.util.Set; +032import java.util.regex.Matcher; +033import java.util.regex.Pattern; +034 +035import org.apache.commons.codec.language.bm.Languages.LanguageSet; +036 +037/** +038 * A phoneme rule. +039 * <p> +040 * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply +041 * and a logical flag indicating if all languages must be in play. A rule matches if: +042 * <ul> +043 * <li>the pattern matches at the current position</li> +044 * <li>the string up until the beginning of the pattern matches the left context</li> +045 * <li>the string from the end of the pattern matches the right context</li> +046 * <li>logical is ALL and all languages are in scope; or</li> +047 * <li>logical is any other value and at least one language is in scope</li> +048 * </ul> +049 * <p> +050 * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user +051 * to explicitly construct their own. +052 * <p> +053 * Rules are immutable and thread-safe. +054 * <p> +055 * <b>Rules resources</b> +056 * <p> +057 * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically +058 * named following the pattern: +059 * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> +060 * <p> +061 * The format of these resources is the following: +062 * <ul> +063 * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these +064 * will be interpreted as: +065 * <ol> +066 * <li>pattern</li> +067 * <li>left context</li> +068 * <li>right context</li> +069 * <li>phoneme</li> +070 * </ol> +071 * </li> +072 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded +073 * as a comment.</li> +074 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip +075 * all content until a line ending in '*' and '/' is found.</li> +076 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +077 * </ul> +078 * +079 * @since 1.6 +080 * @version $Id$ +081 */ +082public class Rule { +083 +084 public static final class Phoneme implements PhonemeExpr { +085 public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { +086 @Override +087 public int compare(final Phoneme o1, final Phoneme o2) { +088 for (int i = 0; i < o1.phonemeText.length(); i++) { +089 if (i >= o2.phonemeText.length()) { +090 return +1; +091 } +092 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); +093 if (c != 0) { +094 return c; +095 } +096 } +097 +098 if (o1.phonemeText.length() < o2.phonemeText.length()) { +099 return -1; +100 } +101 +102 return 0; +103 } +104 }; +105 +106 private final StringBuilder phonemeText; +107 private final Languages.LanguageSet languages; +108 +109 public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { +110 this.phonemeText = new StringBuilder(phonemeText); +111 this.languages = languages; +112 } +113 +114 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { +115 this(phonemeLeft.phonemeText, phonemeLeft.languages); +116 this.phonemeText.append(phonemeRight.phonemeText); +117 } +118 +119 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { +120 this(phonemeLeft.phonemeText, languages); +121 this.phonemeText.append(phonemeRight.phonemeText); +122 } +123 +124 public Phoneme append(final CharSequence str) { +125 this.phonemeText.append(str); +126 return this; +127 } +128 +129 public Languages.LanguageSet getLanguages() { +130 return this.languages; +131 } +132 +133 @Override +134 public Iterable<Phoneme> getPhonemes() { +135 return Collections.singleton(this); +136 } +137 +138 public CharSequence getPhonemeText() { +139 return this.phonemeText; +140 } +141 +142 /** +143 * Deprecated since 1.9. +144 * +145 * @param right the Phoneme to join +146 * @return a new Phoneme +147 * @deprecated since 1.9 +148 */ +149 @Deprecated +150 public Phoneme join(final Phoneme right) { +151 return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), +152 this.languages.restrictTo(right.languages)); +153 } +154 +155 /** +156 * Returns a new Phoneme with the same text but a union of its +157 * current language set and the given one. +158 * +159 * @param lang the language set to merge +160 * @return a new Phoneme +161 */ +162 public Phoneme mergeWithLanguage(final LanguageSet lang) { +163 return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); +164 } +165 +166 @Override +167 public String toString() { +168 return phonemeText.toString() + "[" + languages + "]"; +169 } +170 } +171 +172 public interface PhonemeExpr { +173 Iterable<Phoneme> getPhonemes(); +174 } +175 +176 public static final class PhonemeList implements PhonemeExpr { +177 private final List<Phoneme> phonemes; +178 +179 public PhonemeList(final List<Phoneme> phonemes) { +180 this.phonemes = phonemes; +181 } +182 +183 @Override +184 public List<Phoneme> getPhonemes() { +185 return this.phonemes; +186 } +187 } +188 +189 /** +190 * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. +191 */ +192 public interface RPattern { +193 boolean isMatch(CharSequence input); +194 } +195 +196 public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { +197 @Override +198 public boolean isMatch(final CharSequence input) { +199 return true; +200 } +201 }; +202 +203 public static final String ALL = "ALL"; +204 +205 private static final String DOUBLE_QUOTE = "\""; +206 +207 private static final String HASH_INCLUDE = "#include"; +208 +209 private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = +210 new EnumMap<>(NameType.class); +211 +212 static { +213 for (final NameType s : NameType.values()) { +214 final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = +215 new EnumMap<>(RuleType.class); +216 +217 for (final RuleType rt : RuleType.values()) { +218 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>(); +219 +220 final Languages ls = Languages.getInstance(s); +221 for (final String l : ls.getLanguages()) { +222 try (final Scanner scanner = createScanner(s, rt, l)) { +223 rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); +224 } catch (final IllegalStateException e) { +225 throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); +226 } +227 } +228 if (!rt.equals(RuleType.RULES)) { +229 try (final Scanner scanner = createScanner(s, rt, "common")) { +230 rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); +231 } +232 } +233 +234 rts.put(rt, Collections.unmodifiableMap(rs)); +235 } +236 +237 RULES.put(s, Collections.unmodifiableMap(rts)); +238 } +239 } +240 +241 private static boolean contains(final CharSequence chars, final char input) { +242 for (int i = 0; i < chars.length(); i++) { +243 if (chars.charAt(i) == input) { +244 return true; +245 } +246 } +247 return false; +248 } +249 +250 private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { +251 return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", +252 nameType.getName(), rt.getName(), lang); +253 } +254 +255 private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { +256 final String resName = createResourceName(nameType, rt, lang); +257 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +258 +259 if (rulesIS == null) { +260 throw new IllegalArgumentException("Unable to load resource: " + resName); +261 } +262 +263 return new Scanner(rulesIS, ResourceConstants.ENCODING); +264 } +265 +266 private static Scanner createScanner(final String lang) { +267 final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); +268 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +269 +270 if (rulesIS == null) { +271 throw new IllegalArgumentException("Unable to load resource: " + resName); +272 } +273 +274 return new Scanner(rulesIS, ResourceConstants.ENCODING); +275 } +276 +277 private static boolean endsWith(final CharSequence input, final CharSequence suffix) { +278 if (suffix.length() > input.length()) { +279 return false; +280 } +281 for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { +282 if (input.charAt(i) != suffix.charAt(j)) { +283 return false; +284 } +285 } +286 return true; +287 } +288 +289 /** +290 * Gets rules for a combination of name type, rule type and languages. +291 * +292 * @param nameType +293 * the NameType to consider +294 * @param rt +295 * the RuleType to consider +296 * @param langs +297 * the set of languages to consider +298 * @return a list of Rules that apply +299 */ +300 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, +301 final Languages.LanguageSet langs) { +302 final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); +303 final List<Rule> allRules = new ArrayList<>(); +304 for (final List<Rule> rules : ruleMap.values()) { +305 allRules.addAll(rules); +306 } +307 return allRules; +308 } +309 +310 /** +311 * Gets rules for a combination of name type, rule type and a single language. +312 * +313 * @param nameType +314 * the NameType to consider +315 * @param rt +316 * the RuleType to consider +317 * @param lang +318 * the language to consider +319 * @return a list of Rules that apply +320 */ +321 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { +322 return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); +323 } +324 +325 /** +326 * Gets rules for a combination of name type, rule type and languages. +327 * +328 * @param nameType +329 * the NameType to consider +330 * @param rt +331 * the RuleType to consider +332 * @param langs +333 * the set of languages to consider +334 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +335 * @since 1.9 +336 */ +337 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +338 final Languages.LanguageSet langs) { +339 return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : +340 getInstanceMap(nameType, rt, Languages.ANY); +341 } +342 +343 /** +344 * Gets rules for a combination of name type, rule type and a single language. +345 * +346 * @param nameType +347 * the NameType to consider +348 * @param rt +349 * the RuleType to consider +350 * @param lang +351 * the language to consider +352 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +353 * @since 1.9 +354 */ +355 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +356 final String lang) { +357 final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); +358 +359 if (rules == null) { +360 throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", +361 nameType.getName(), rt.getName(), lang)); +362 } +363 +364 return rules; +365 } +366 +367 private static Phoneme parsePhoneme(final String ph) { +368 final int open = ph.indexOf("["); +369 if (open >= 0) { +370 if (!ph.endsWith("]")) { +371 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); +372 } +373 final String before = ph.substring(0, open); +374 final String in = ph.substring(open + 1, ph.length() - 1); +375 final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]"))); +376 +377 return new Phoneme(before, Languages.LanguageSet.from(langs)); +378 } +379 return new Phoneme(ph, Languages.ANY_LANGUAGE); +380 } +381 +382 private static PhonemeExpr parsePhonemeExpr(final String ph) { +383 if (ph.startsWith("(")) { // we have a bracketed list of options +384 if (!ph.endsWith(")")) { +385 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); +386 } +387 +388 final List<Phoneme> phs = new ArrayList<>(); +389 final String body = ph.substring(1, ph.length() - 1); +390 for (final String part : body.split("[|]")) { +391 phs.add(parsePhoneme(part)); +392 } +393 if (body.startsWith("|") || body.endsWith("|")) { +394 phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); +395 } +396 +397 return new PhonemeList(phs); +398 } +399 return parsePhoneme(ph); +400 } +401 +402 private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { +403 final Map<String, List<Rule>> lines = new HashMap<>(); +404 int currentLine = 0; +405 +406 boolean inMultilineComment = false; +407 while (scanner.hasNextLine()) { +408 currentLine++; +409 final String rawLine = scanner.nextLine(); +410 String line = rawLine; +411 +412 if (inMultilineComment) { +413 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +414 inMultilineComment = false; +415 } +416 } else { +417 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +418 inMultilineComment = true; +419 } else { +420 // discard comments +421 final int cmtI = line.indexOf(ResourceConstants.CMT); +422 if (cmtI >= 0) { +423 line = line.substring(0, cmtI); +424 } +425 +426 // trim leading-trailing whitespace +427 line = line.trim(); +428 +429 if (line.length() == 0) { +430 continue; // empty lines can be safely skipped +431 } +432 +433 if (line.startsWith(HASH_INCLUDE)) { +434 // include statement +435 final String incl = line.substring(HASH_INCLUDE.length()).trim(); +436 if (incl.contains(" ")) { +437 throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + +438 location); +439 } +440 try (final Scanner hashIncludeScanner = createScanner(incl)) { +441 lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); +442 } +443 } else { +444 // rule +445 final String[] parts = line.split("\\s+"); +446 if (parts.length != 4) { +447 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +448 " parts: " + rawLine + " in " + location); +449 } +450 try { +451 final String pat = stripQuotes(parts[0]); +452 final String lCon = stripQuotes(parts[1]); +453 final String rCon = stripQuotes(parts[2]); +454 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); +455 final int cLine = currentLine; +456 final Rule r = new Rule(pat, lCon, rCon, ph) { +457 private final int myLine = cLine; +458 private final String loc = location; +459 +460 @Override +461 public String toString() { +462 final StringBuilder sb = new StringBuilder(); +463 sb.append("Rule"); +464 sb.append("{line=").append(myLine); +465 sb.append(", loc='").append(loc).append('\''); +466 sb.append(", pat='").append(pat).append('\''); +467 sb.append(", lcon='").append(lCon).append('\''); +468 sb.append(", rcon='").append(rCon).append('\''); +469 sb.append('}'); +470 return sb.toString(); +471 } +472 }; +473 final String patternKey = r.pattern.substring(0,1); +474 List<Rule> rules = lines.get(patternKey); +475 if (rules == null) { +476 rules = new ArrayList<>(); +477 lines.put(patternKey, rules); +478 } +479 rules.add(r); +480 } catch (final IllegalArgumentException e) { +481 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + +482 location, e); +483 } +484 } +485 } +486 } +487 } +488 +489 return lines; +490 } +491 +492 /** +493 * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. +494 * +495 * @param regex +496 * the regular expression to compile +497 * @return an RPattern that will match this regex +498 */ +499 private static RPattern pattern(final String regex) { +500 final boolean startsWith = regex.startsWith("^"); +501 final boolean endsWith = regex.endsWith("$"); +502 final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); +503 final boolean boxes = content.contains("["); +504 +505 if (!boxes) { +506 if (startsWith && endsWith) { +507 // exact match +508 if (content.length() == 0) { +509 // empty +510 return new RPattern() { +511 @Override +512 public boolean isMatch(final CharSequence input) { +513 return input.length() == 0; +514 } +515 }; +516 } +517 return new RPattern() { +518 @Override +519 public boolean isMatch(final CharSequence input) { +520 return input.equals(content); +521 } +522 }; +523 } else if ((startsWith || endsWith) && content.length() == 0) { +524 // matches every string +525 return ALL_STRINGS_RMATCHER; +526 } else if (startsWith) { +527 // matches from start +528 return new RPattern() { +529 @Override +530 public boolean isMatch(final CharSequence input) { +531 return startsWith(input, content); +532 } +533 }; +534 } else if (endsWith) { +535 // matches from start +536 return new RPattern() { +537 @Override +538 public boolean isMatch(final CharSequence input) { +539 return endsWith(input, content); +540 } +541 }; +542 } +543 } else { +544 final boolean startsWithBox = content.startsWith("["); +545 final boolean endsWithBox = content.endsWith("]"); +546 +547 if (startsWithBox && endsWithBox) { +548 String boxContent = content.substring(1, content.length() - 1); +549 if (!boxContent.contains("[")) { +550 // box containing alternatives +551 final boolean negate = boxContent.startsWith("^"); +552 if (negate) { +553 boxContent = boxContent.substring(1); +554 } +555 final String bContent = boxContent; +556 final boolean shouldMatch = !negate; +557 +558 if (startsWith && endsWith) { +559 // exact match +560 return new RPattern() { +561 @Override +562 public boolean isMatch(final CharSequence input) { +563 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; +564 } +565 }; +566 } else if (startsWith) { +567 // first char +568 return new RPattern() { +569 @Override +570 public boolean isMatch(final CharSequence input) { +571 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; +572 } +573 }; +574 } else if (endsWith) { +575 // last char +576 return new RPattern() { +577 @Override +578 public boolean isMatch(final CharSequence input) { +579 return input.length() > 0 && +580 contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; +581 } +582 }; +583 } +584 } +585 } +586 } +587 +588 return new RPattern() { +589 Pattern pattern = Pattern.compile(regex); +590 +591 @Override +592 public boolean isMatch(final CharSequence input) { +593 final Matcher matcher = pattern.matcher(input); +594 return matcher.find(); +595 } +596 }; +597 } +598 +599 private static boolean startsWith(final CharSequence input, final CharSequence prefix) { +600 if (prefix.length() > input.length()) { +601 return false; +602 } +603 for (int i = 0; i < prefix.length(); i++) { +604 if (input.charAt(i) != prefix.charAt(i)) { +605 return false; +606 } +607 } +608 return true; +609 } +610 +611 private static String stripQuotes(String str) { +612 if (str.startsWith(DOUBLE_QUOTE)) { +613 str = str.substring(1); +614 } +615 +616 if (str.endsWith(DOUBLE_QUOTE)) { +617 str = str.substring(0, str.length() - 1); +618 } +619 +620 return str; +621 } +622 +623 private final RPattern lContext; +624 +625 private final String pattern; +626 +627 private final PhonemeExpr phoneme; +628 +629 private final RPattern rContext; +630 +631 /** +632 * Creates a new rule. +633 * +634 * @param pattern +635 * the pattern +636 * @param lContext +637 * the left context +638 * @param rContext +639 * the right context +640 * @param phoneme +641 * the resulting phoneme +642 */ +643 public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { +644 this.pattern = pattern; +645 this.lContext = pattern(lContext + "$"); +646 this.rContext = pattern("^" + rContext); +647 this.phoneme = phoneme; +648 } +649 +650 /** +651 * Gets the left context. This is a regular expression that must match to the left of the pattern. +652 * +653 * @return the left context Pattern +654 */ +655 public RPattern getLContext() { +656 return this.lContext; +657 } +658 +659 /** +660 * Gets the pattern. This is a string-literal that must exactly match. +661 * +662 * @return the pattern +663 */ +664 public String getPattern() { +665 return this.pattern; +666 } +667 +668 /** +669 * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. +670 * +671 * @return the phoneme +672 */ +673 public PhonemeExpr getPhoneme() { +674 return this.phoneme; +675 } +676 +677 /** +678 * Gets the right context. This is a regular expression that must match to the right of the pattern. +679 * +680 * @return the right context Pattern +681 */ +682 public RPattern getRContext() { +683 return this.rContext; +684 } +685 +686 /** +687 * Decides if the pattern and context match the input starting at a position. It is a match if the +688 * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and +689 * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. +690 * +691 * @param input +692 * the input String +693 * @param i +694 * the int position within the input +695 * @return true if the pattern and left/right context match, false otherwise +696 */ +697 public boolean patternAndContextMatches(final CharSequence input, final int i) { +698 if (i < 0) { +699 throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); +700 } +701 +702 final int patternLength = this.pattern.length(); +703 final int ipl = i + patternLength; +704 +705 if (ipl > input.length()) { +706 // not enough room for the pattern to match +707 return false; +708 } +709 +710 // evaluate the pattern, left context and right context +711 // fail early if any of the evaluations is not successful +712 if (!input.subSequence(i, ipl).equals(this.pattern)) { +713 return false; +714 } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { +715 return false; +716 } +717 return this.lContext.isMatch(input.subSequence(0, i)); +718 } +719} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.Comparator; +025import java.util.EnumMap; +026import java.util.HashMap; +027import java.util.HashSet; +028import java.util.List; +029import java.util.Map; +030import java.util.Scanner; +031import java.util.Set; +032import java.util.regex.Matcher; +033import java.util.regex.Pattern; +034 +035import org.apache.commons.codec.language.bm.Languages.LanguageSet; +036 +037/** +038 * A phoneme rule. +039 * <p> +040 * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply +041 * and a logical flag indicating if all languages must be in play. A rule matches if: +042 * <ul> +043 * <li>the pattern matches at the current position</li> +044 * <li>the string up until the beginning of the pattern matches the left context</li> +045 * <li>the string from the end of the pattern matches the right context</li> +046 * <li>logical is ALL and all languages are in scope; or</li> +047 * <li>logical is any other value and at least one language is in scope</li> +048 * </ul> +049 * <p> +050 * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user +051 * to explicitly construct their own. +052 * <p> +053 * Rules are immutable and thread-safe. +054 * <p> +055 * <b>Rules resources</b> +056 * <p> +057 * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically +058 * named following the pattern: +059 * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> +060 * <p> +061 * The format of these resources is the following: +062 * <ul> +063 * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these +064 * will be interpreted as: +065 * <ol> +066 * <li>pattern</li> +067 * <li>left context</li> +068 * <li>right context</li> +069 * <li>phoneme</li> +070 * </ol> +071 * </li> +072 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded +073 * as a comment.</li> +074 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip +075 * all content until a line ending in '*' and '/' is found.</li> +076 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +077 * </ul> +078 * +079 * @since 1.6 +080 * @version $Id$ +081 */ +082public class Rule { +083 +084 public static final class Phoneme implements PhonemeExpr { +085 public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { +086 @Override +087 public int compare(final Phoneme o1, final Phoneme o2) { +088 for (int i = 0; i < o1.phonemeText.length(); i++) { +089 if (i >= o2.phonemeText.length()) { +090 return +1; +091 } +092 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); +093 if (c != 0) { +094 return c; +095 } +096 } +097 +098 if (o1.phonemeText.length() < o2.phonemeText.length()) { +099 return -1; +100 } +101 +102 return 0; +103 } +104 }; +105 +106 private final StringBuilder phonemeText; +107 private final Languages.LanguageSet languages; +108 +109 public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { +110 this.phonemeText = new StringBuilder(phonemeText); +111 this.languages = languages; +112 } +113 +114 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { +115 this(phonemeLeft.phonemeText, phonemeLeft.languages); +116 this.phonemeText.append(phonemeRight.phonemeText); +117 } +118 +119 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { +120 this(phonemeLeft.phonemeText, languages); +121 this.phonemeText.append(phonemeRight.phonemeText); +122 } +123 +124 public Phoneme append(final CharSequence str) { +125 this.phonemeText.append(str); +126 return this; +127 } +128 +129 public Languages.LanguageSet getLanguages() { +130 return this.languages; +131 } +132 +133 @Override +134 public Iterable<Phoneme> getPhonemes() { +135 return Collections.singleton(this); +136 } +137 +138 public CharSequence getPhonemeText() { +139 return this.phonemeText; +140 } +141 +142 /** +143 * Deprecated since 1.9. +144 * +145 * @param right the Phoneme to join +146 * @return a new Phoneme +147 * @deprecated since 1.9 +148 */ +149 @Deprecated +150 public Phoneme join(final Phoneme right) { +151 return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), +152 this.languages.restrictTo(right.languages)); +153 } +154 +155 /** +156 * Returns a new Phoneme with the same text but a union of its +157 * current language set and the given one. +158 * +159 * @param lang the language set to merge +160 * @return a new Phoneme +161 */ +162 public Phoneme mergeWithLanguage(final LanguageSet lang) { +163 return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); +164 } +165 +166 @Override +167 public String toString() { +168 return phonemeText.toString() + "[" + languages + "]"; +169 } +170 } +171 +172 public interface PhonemeExpr { +173 Iterable<Phoneme> getPhonemes(); +174 } +175 +176 public static final class PhonemeList implements PhonemeExpr { +177 private final List<Phoneme> phonemes; +178 +179 public PhonemeList(final List<Phoneme> phonemes) { +180 this.phonemes = phonemes; +181 } +182 +183 @Override +184 public List<Phoneme> getPhonemes() { +185 return this.phonemes; +186 } +187 } +188 +189 /** +190 * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. +191 */ +192 public interface RPattern { +193 boolean isMatch(CharSequence input); +194 } +195 +196 public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { +197 @Override +198 public boolean isMatch(final CharSequence input) { +199 return true; +200 } +201 }; +202 +203 public static final String ALL = "ALL"; +204 +205 private static final String DOUBLE_QUOTE = "\""; +206 +207 private static final String HASH_INCLUDE = "#include"; +208 +209 private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = +210 new EnumMap<>(NameType.class); +211 +212 static { +213 for (final NameType s : NameType.values()) { +214 final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = +215 new EnumMap<>(RuleType.class); +216 +217 for (final RuleType rt : RuleType.values()) { +218 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>(); +219 +220 final Languages ls = Languages.getInstance(s); +221 for (final String l : ls.getLanguages()) { +222 try (final Scanner scanner = createScanner(s, rt, l)) { +223 rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); +224 } catch (final IllegalStateException e) { +225 throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); +226 } +227 } +228 if (!rt.equals(RuleType.RULES)) { +229 try (final Scanner scanner = createScanner(s, rt, "common")) { +230 rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); +231 } +232 } +233 +234 rts.put(rt, Collections.unmodifiableMap(rs)); +235 } +236 +237 RULES.put(s, Collections.unmodifiableMap(rts)); +238 } +239 } +240 +241 private static boolean contains(final CharSequence chars, final char input) { +242 for (int i = 0; i < chars.length(); i++) { +243 if (chars.charAt(i) == input) { +244 return true; +245 } +246 } +247 return false; +248 } +249 +250 private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { +251 return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", +252 nameType.getName(), rt.getName(), lang); +253 } +254 +255 private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { +256 final String resName = createResourceName(nameType, rt, lang); +257 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +258 +259 if (rulesIS == null) { +260 throw new IllegalArgumentException("Unable to load resource: " + resName); +261 } +262 +263 return new Scanner(rulesIS, ResourceConstants.ENCODING); +264 } +265 +266 private static Scanner createScanner(final String lang) { +267 final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); +268 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +269 +270 if (rulesIS == null) { +271 throw new IllegalArgumentException("Unable to load resource: " + resName); +272 } +273 +274 return new Scanner(rulesIS, ResourceConstants.ENCODING); +275 } +276 +277 private static boolean endsWith(final CharSequence input, final CharSequence suffix) { +278 if (suffix.length() > input.length()) { +279 return false; +280 } +281 for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { +282 if (input.charAt(i) != suffix.charAt(j)) { +283 return false; +284 } +285 } +286 return true; +287 } +288 +289 /** +290 * Gets rules for a combination of name type, rule type and languages. +291 * +292 * @param nameType +293 * the NameType to consider +294 * @param rt +295 * the RuleType to consider +296 * @param langs +297 * the set of languages to consider +298 * @return a list of Rules that apply +299 */ +300 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, +301 final Languages.LanguageSet langs) { +302 final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); +303 final List<Rule> allRules = new ArrayList<>(); +304 for (final List<Rule> rules : ruleMap.values()) { +305 allRules.addAll(rules); +306 } +307 return allRules; +308 } +309 +310 /** +311 * Gets rules for a combination of name type, rule type and a single language. +312 * +313 * @param nameType +314 * the NameType to consider +315 * @param rt +316 * the RuleType to consider +317 * @param lang +318 * the language to consider +319 * @return a list of Rules that apply +320 */ +321 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { +322 return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); +323 } +324 +325 /** +326 * Gets rules for a combination of name type, rule type and languages. +327 * +328 * @param nameType +329 * the NameType to consider +330 * @param rt +331 * the RuleType to consider +332 * @param langs +333 * the set of languages to consider +334 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +335 * @since 1.9 +336 */ +337 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +338 final Languages.LanguageSet langs) { +339 return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : +340 getInstanceMap(nameType, rt, Languages.ANY); +341 } +342 +343 /** +344 * Gets rules for a combination of name type, rule type and a single language. +345 * +346 * @param nameType +347 * the NameType to consider +348 * @param rt +349 * the RuleType to consider +350 * @param lang +351 * the language to consider +352 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +353 * @since 1.9 +354 */ +355 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +356 final String lang) { +357 final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); +358 +359 if (rules == null) { +360 throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", +361 nameType.getName(), rt.getName(), lang)); +362 } +363 +364 return rules; +365 } +366 +367 private static Phoneme parsePhoneme(final String ph) { +368 final int open = ph.indexOf("["); +369 if (open >= 0) { +370 if (!ph.endsWith("]")) { +371 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); +372 } +373 final String before = ph.substring(0, open); +374 final String in = ph.substring(open + 1, ph.length() - 1); +375 final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]"))); +376 +377 return new Phoneme(before, Languages.LanguageSet.from(langs)); +378 } +379 return new Phoneme(ph, Languages.ANY_LANGUAGE); +380 } +381 +382 private static PhonemeExpr parsePhonemeExpr(final String ph) { +383 if (ph.startsWith("(")) { // we have a bracketed list of options +384 if (!ph.endsWith(")")) { +385 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); +386 } +387 +388 final List<Phoneme> phs = new ArrayList<>(); +389 final String body = ph.substring(1, ph.length() - 1); +390 for (final String part : body.split("[|]")) { +391 phs.add(parsePhoneme(part)); +392 } +393 if (body.startsWith("|") || body.endsWith("|")) { +394 phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); +395 } +396 +397 return new PhonemeList(phs); +398 } +399 return parsePhoneme(ph); +400 } +401 +402 private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { +403 final Map<String, List<Rule>> lines = new HashMap<>(); +404 int currentLine = 0; +405 +406 boolean inMultilineComment = false; +407 while (scanner.hasNextLine()) { +408 currentLine++; +409 final String rawLine = scanner.nextLine(); +410 String line = rawLine; +411 +412 if (inMultilineComment) { +413 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +414 inMultilineComment = false; +415 } +416 } else { +417 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +418 inMultilineComment = true; +419 } else { +420 // discard comments +421 final int cmtI = line.indexOf(ResourceConstants.CMT); +422 if (cmtI >= 0) { +423 line = line.substring(0, cmtI); +424 } +425 +426 // trim leading-trailing whitespace +427 line = line.trim(); +428 +429 if (line.length() == 0) { +430 continue; // empty lines can be safely skipped +431 } +432 +433 if (line.startsWith(HASH_INCLUDE)) { +434 // include statement +435 final String incl = line.substring(HASH_INCLUDE.length()).trim(); +436 if (incl.contains(" ")) { +437 throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + +438 location); +439 } +440 try (final Scanner hashIncludeScanner = createScanner(incl)) { +441 lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); +442 } +443 } else { +444 // rule +445 final String[] parts = line.split("\\s+"); +446 if (parts.length != 4) { +447 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +448 " parts: " + rawLine + " in " + location); +449 } +450 try { +451 final String pat = stripQuotes(parts[0]); +452 final String lCon = stripQuotes(parts[1]); +453 final String rCon = stripQuotes(parts[2]); +454 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); +455 final int cLine = currentLine; +456 final Rule r = new Rule(pat, lCon, rCon, ph) { +457 private final int myLine = cLine; +458 private final String loc = location; +459 +460 @Override +461 public String toString() { +462 final StringBuilder sb = new StringBuilder(); +463 sb.append("Rule"); +464 sb.append("{line=").append(myLine); +465 sb.append(", loc='").append(loc).append('\''); +466 sb.append(", pat='").append(pat).append('\''); +467 sb.append(", lcon='").append(lCon).append('\''); +468 sb.append(", rcon='").append(rCon).append('\''); +469 sb.append('}'); +470 return sb.toString(); +471 } +472 }; +473 final String patternKey = r.pattern.substring(0,1); +474 List<Rule> rules = lines.get(patternKey); +475 if (rules == null) { +476 rules = new ArrayList<>(); +477 lines.put(patternKey, rules); +478 } +479 rules.add(r); +480 } catch (final IllegalArgumentException e) { +481 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + +482 location, e); +483 } +484 } +485 } +486 } +487 } +488 +489 return lines; +490 } +491 +492 /** +493 * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. +494 * +495 * @param regex +496 * the regular expression to compile +497 * @return an RPattern that will match this regex +498 */ +499 private static RPattern pattern(final String regex) { +500 final boolean startsWith = regex.startsWith("^"); +501 final boolean endsWith = regex.endsWith("$"); +502 final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); +503 final boolean boxes = content.contains("["); +504 +505 if (!boxes) { +506 if (startsWith && endsWith) { +507 // exact match +508 if (content.length() == 0) { +509 // empty +510 return new RPattern() { +511 @Override +512 public boolean isMatch(final CharSequence input) { +513 return input.length() == 0; +514 } +515 }; +516 } +517 return new RPattern() { +518 @Override +519 public boolean isMatch(final CharSequence input) { +520 return input.equals(content); +521 } +522 }; +523 } else if ((startsWith || endsWith) && content.length() == 0) { +524 // matches every string +525 return ALL_STRINGS_RMATCHER; +526 } else if (startsWith) { +527 // matches from start +528 return new RPattern() { +529 @Override +530 public boolean isMatch(final CharSequence input) { +531 return startsWith(input, content); +532 } +533 }; +534 } else if (endsWith) { +535 // matches from start +536 return new RPattern() { +537 @Override +538 public boolean isMatch(final CharSequence input) { +539 return endsWith(input, content); +540 } +541 }; +542 } +543 } else { +544 final boolean startsWithBox = content.startsWith("["); +545 final boolean endsWithBox = content.endsWith("]"); +546 +547 if (startsWithBox && endsWithBox) { +548 String boxContent = content.substring(1, content.length() - 1); +549 if (!boxContent.contains("[")) { +550 // box containing alternatives +551 final boolean negate = boxContent.startsWith("^"); +552 if (negate) { +553 boxContent = boxContent.substring(1); +554 } +555 final String bContent = boxContent; +556 final boolean shouldMatch = !negate; +557 +558 if (startsWith && endsWith) { +559 // exact match +560 return new RPattern() { +561 @Override +562 public boolean isMatch(final CharSequence input) { +563 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; +564 } +565 }; +566 } else if (startsWith) { +567 // first char +568 return new RPattern() { +569 @Override +570 public boolean isMatch(final CharSequence input) { +571 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; +572 } +573 }; +574 } else if (endsWith) { +575 // last char +576 return new RPattern() { +577 @Override +578 public boolean isMatch(final CharSequence input) { +579 return input.length() > 0 && +580 contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; +581 } +582 }; +583 } +584 } +585 } +586 } +587 +588 return new RPattern() { +589 Pattern pattern = Pattern.compile(regex); +590 +591 @Override +592 public boolean isMatch(final CharSequence input) { +593 final Matcher matcher = pattern.matcher(input); +594 return matcher.find(); +595 } +596 }; +597 } +598 +599 private static boolean startsWith(final CharSequence input, final CharSequence prefix) { +600 if (prefix.length() > input.length()) { +601 return false; +602 } +603 for (int i = 0; i < prefix.length(); i++) { +604 if (input.charAt(i) != prefix.charAt(i)) { +605 return false; +606 } +607 } +608 return true; +609 } +610 +611 private static String stripQuotes(String str) { +612 if (str.startsWith(DOUBLE_QUOTE)) { +613 str = str.substring(1); +614 } +615 +616 if (str.endsWith(DOUBLE_QUOTE)) { +617 str = str.substring(0, str.length() - 1); +618 } +619 +620 return str; +621 } +622 +623 private final RPattern lContext; +624 +625 private final String pattern; +626 +627 private final PhonemeExpr phoneme; +628 +629 private final RPattern rContext; +630 +631 /** +632 * Creates a new rule. +633 * +634 * @param pattern +635 * the pattern +636 * @param lContext +637 * the left context +638 * @param rContext +639 * the right context +640 * @param phoneme +641 * the resulting phoneme +642 */ +643 public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { +644 this.pattern = pattern; +645 this.lContext = pattern(lContext + "$"); +646 this.rContext = pattern("^" + rContext); +647 this.phoneme = phoneme; +648 } +649 +650 /** +651 * Gets the left context. This is a regular expression that must match to the left of the pattern. +652 * +653 * @return the left context Pattern +654 */ +655 public RPattern getLContext() { +656 return this.lContext; +657 } +658 +659 /** +660 * Gets the pattern. This is a string-literal that must exactly match. +661 * +662 * @return the pattern +663 */ +664 public String getPattern() { +665 return this.pattern; +666 } +667 +668 /** +669 * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. +670 * +671 * @return the phoneme +672 */ +673 public PhonemeExpr getPhoneme() { +674 return this.phoneme; +675 } +676 +677 /** +678 * Gets the right context. This is a regular expression that must match to the right of the pattern. +679 * +680 * @return the right context Pattern +681 */ +682 public RPattern getRContext() { +683 return this.rContext; +684 } +685 +686 /** +687 * Decides if the pattern and context match the input starting at a position. It is a match if the +688 * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and +689 * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. +690 * +691 * @param input +692 * the input String +693 * @param i +694 * the int position within the input +695 * @return true if the pattern and left/right context match, false otherwise +696 */ +697 public boolean patternAndContextMatches(final CharSequence input, final int i) { +698 if (i < 0) { +699 throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); +700 } +701 +702 final int patternLength = this.pattern.length(); +703 final int ipl = i + patternLength; +704 +705 if (ipl > input.length()) { +706 // not enough room for the pattern to match +707 return false; +708 } +709 +710 // evaluate the pattern, left context and right context +711 // fail early if any of the evaluations is not successful +712 if (!input.subSequence(i, ipl).equals(this.pattern)) { +713 return false; +714 } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { +715 return false; +716 } +717 return this.lContext.isMatch(input.subSequence(0, i)); +718 } +719} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.Comparator; +025import java.util.EnumMap; +026import java.util.HashMap; +027import java.util.HashSet; +028import java.util.List; +029import java.util.Map; +030import java.util.Scanner; +031import java.util.Set; +032import java.util.regex.Matcher; +033import java.util.regex.Pattern; +034 +035import org.apache.commons.codec.language.bm.Languages.LanguageSet; +036 +037/** +038 * A phoneme rule. +039 * <p> +040 * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply +041 * and a logical flag indicating if all languages must be in play. A rule matches if: +042 * <ul> +043 * <li>the pattern matches at the current position</li> +044 * <li>the string up until the beginning of the pattern matches the left context</li> +045 * <li>the string from the end of the pattern matches the right context</li> +046 * <li>logical is ALL and all languages are in scope; or</li> +047 * <li>logical is any other value and at least one language is in scope</li> +048 * </ul> +049 * <p> +050 * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user +051 * to explicitly construct their own. +052 * <p> +053 * Rules are immutable and thread-safe. +054 * <p> +055 * <b>Rules resources</b> +056 * <p> +057 * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically +058 * named following the pattern: +059 * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> +060 * <p> +061 * The format of these resources is the following: +062 * <ul> +063 * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these +064 * will be interpreted as: +065 * <ol> +066 * <li>pattern</li> +067 * <li>left context</li> +068 * <li>right context</li> +069 * <li>phoneme</li> +070 * </ol> +071 * </li> +072 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded +073 * as a comment.</li> +074 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip +075 * all content until a line ending in '*' and '/' is found.</li> +076 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +077 * </ul> +078 * +079 * @since 1.6 +080 * @version $Id$ +081 */ +082public class Rule { +083 +084 public static final class Phoneme implements PhonemeExpr { +085 public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { +086 @Override +087 public int compare(final Phoneme o1, final Phoneme o2) { +088 for (int i = 0; i < o1.phonemeText.length(); i++) { +089 if (i >= o2.phonemeText.length()) { +090 return +1; +091 } +092 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); +093 if (c != 0) { +094 return c; +095 } +096 } +097 +098 if (o1.phonemeText.length() < o2.phonemeText.length()) { +099 return -1; +100 } +101 +102 return 0; +103 } +104 }; +105 +106 private final StringBuilder phonemeText; +107 private final Languages.LanguageSet languages; +108 +109 public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { +110 this.phonemeText = new StringBuilder(phonemeText); +111 this.languages = languages; +112 } +113 +114 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { +115 this(phonemeLeft.phonemeText, phonemeLeft.languages); +116 this.phonemeText.append(phonemeRight.phonemeText); +117 } +118 +119 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { +120 this(phonemeLeft.phonemeText, languages); +121 this.phonemeText.append(phonemeRight.phonemeText); +122 } +123 +124 public Phoneme append(final CharSequence str) { +125 this.phonemeText.append(str); +126 return this; +127 } +128 +129 public Languages.LanguageSet getLanguages() { +130 return this.languages; +131 } +132 +133 @Override +134 public Iterable<Phoneme> getPhonemes() { +135 return Collections.singleton(this); +136 } +137 +138 public CharSequence getPhonemeText() { +139 return this.phonemeText; +140 } +141 +142 /** +143 * Deprecated since 1.9. +144 * +145 * @param right the Phoneme to join +146 * @return a new Phoneme +147 * @deprecated since 1.9 +148 */ +149 @Deprecated +150 public Phoneme join(final Phoneme right) { +151 return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), +152 this.languages.restrictTo(right.languages)); +153 } +154 +155 /** +156 * Returns a new Phoneme with the same text but a union of its +157 * current language set and the given one. +158 * +159 * @param lang the language set to merge +160 * @return a new Phoneme +161 */ +162 public Phoneme mergeWithLanguage(final LanguageSet lang) { +163 return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); +164 } +165 +166 @Override +167 public String toString() { +168 return phonemeText.toString() + "[" + languages + "]"; +169 } +170 } +171 +172 public interface PhonemeExpr { +173 Iterable<Phoneme> getPhonemes(); +174 } +175 +176 public static final class PhonemeList implements PhonemeExpr { +177 private final List<Phoneme> phonemes; +178 +179 public PhonemeList(final List<Phoneme> phonemes) { +180 this.phonemes = phonemes; +181 } +182 +183 @Override +184 public List<Phoneme> getPhonemes() { +185 return this.phonemes; +186 } +187 } +188 +189 /** +190 * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. +191 */ +192 public interface RPattern { +193 boolean isMatch(CharSequence input); +194 } +195 +196 public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { +197 @Override +198 public boolean isMatch(final CharSequence input) { +199 return true; +200 } +201 }; +202 +203 public static final String ALL = "ALL"; +204 +205 private static final String DOUBLE_QUOTE = "\""; +206 +207 private static final String HASH_INCLUDE = "#include"; +208 +209 private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = +210 new EnumMap<>(NameType.class); +211 +212 static { +213 for (final NameType s : NameType.values()) { +214 final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = +215 new EnumMap<>(RuleType.class); +216 +217 for (final RuleType rt : RuleType.values()) { +218 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>(); +219 +220 final Languages ls = Languages.getInstance(s); +221 for (final String l : ls.getLanguages()) { +222 try (final Scanner scanner = createScanner(s, rt, l)) { +223 rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); +224 } catch (final IllegalStateException e) { +225 throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); +226 } +227 } +228 if (!rt.equals(RuleType.RULES)) { +229 try (final Scanner scanner = createScanner(s, rt, "common")) { +230 rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); +231 } +232 } +233 +234 rts.put(rt, Collections.unmodifiableMap(rs)); +235 } +236 +237 RULES.put(s, Collections.unmodifiableMap(rts)); +238 } +239 } +240 +241 private static boolean contains(final CharSequence chars, final char input) { +242 for (int i = 0; i < chars.length(); i++) { +243 if (chars.charAt(i) == input) { +244 return true; +245 } +246 } +247 return false; +248 } +249 +250 private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { +251 return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", +252 nameType.getName(), rt.getName(), lang); +253 } +254 +255 private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { +256 final String resName = createResourceName(nameType, rt, lang); +257 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +258 +259 if (rulesIS == null) { +260 throw new IllegalArgumentException("Unable to load resource: " + resName); +261 } +262 +263 return new Scanner(rulesIS, ResourceConstants.ENCODING); +264 } +265 +266 private static Scanner createScanner(final String lang) { +267 final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); +268 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +269 +270 if (rulesIS == null) { +271 throw new IllegalArgumentException("Unable to load resource: " + resName); +272 } +273 +274 return new Scanner(rulesIS, ResourceConstants.ENCODING); +275 } +276 +277 private static boolean endsWith(final CharSequence input, final CharSequence suffix) { +278 if (suffix.length() > input.length()) { +279 return false; +280 } +281 for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { +282 if (input.charAt(i) != suffix.charAt(j)) { +283 return false; +284 } +285 } +286 return true; +287 } +288 +289 /** +290 * Gets rules for a combination of name type, rule type and languages. +291 * +292 * @param nameType +293 * the NameType to consider +294 * @param rt +295 * the RuleType to consider +296 * @param langs +297 * the set of languages to consider +298 * @return a list of Rules that apply +299 */ +300 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, +301 final Languages.LanguageSet langs) { +302 final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); +303 final List<Rule> allRules = new ArrayList<>(); +304 for (final List<Rule> rules : ruleMap.values()) { +305 allRules.addAll(rules); +306 } +307 return allRules; +308 } +309 +310 /** +311 * Gets rules for a combination of name type, rule type and a single language. +312 * +313 * @param nameType +314 * the NameType to consider +315 * @param rt +316 * the RuleType to consider +317 * @param lang +318 * the language to consider +319 * @return a list of Rules that apply +320 */ +321 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { +322 return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); +323 } +324 +325 /** +326 * Gets rules for a combination of name type, rule type and languages. +327 * +328 * @param nameType +329 * the NameType to consider +330 * @param rt +331 * the RuleType to consider +332 * @param langs +333 * the set of languages to consider +334 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +335 * @since 1.9 +336 */ +337 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +338 final Languages.LanguageSet langs) { +339 return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : +340 getInstanceMap(nameType, rt, Languages.ANY); +341 } +342 +343 /** +344 * Gets rules for a combination of name type, rule type and a single language. +345 * +346 * @param nameType +347 * the NameType to consider +348 * @param rt +349 * the RuleType to consider +350 * @param lang +351 * the language to consider +352 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +353 * @since 1.9 +354 */ +355 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +356 final String lang) { +357 final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); +358 +359 if (rules == null) { +360 throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", +361 nameType.getName(), rt.getName(), lang)); +362 } +363 +364 return rules; +365 } +366 +367 private static Phoneme parsePhoneme(final String ph) { +368 final int open = ph.indexOf("["); +369 if (open >= 0) { +370 if (!ph.endsWith("]")) { +371 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); +372 } +373 final String before = ph.substring(0, open); +374 final String in = ph.substring(open + 1, ph.length() - 1); +375 final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]"))); +376 +377 return new Phoneme(before, Languages.LanguageSet.from(langs)); +378 } +379 return new Phoneme(ph, Languages.ANY_LANGUAGE); +380 } +381 +382 private static PhonemeExpr parsePhonemeExpr(final String ph) { +383 if (ph.startsWith("(")) { // we have a bracketed list of options +384 if (!ph.endsWith(")")) { +385 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); +386 } +387 +388 final List<Phoneme> phs = new ArrayList<>(); +389 final String body = ph.substring(1, ph.length() - 1); +390 for (final String part : body.split("[|]")) { +391 phs.add(parsePhoneme(part)); +392 } +393 if (body.startsWith("|") || body.endsWith("|")) { +394 phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); +395 } +396 +397 return new PhonemeList(phs); +398 } +399 return parsePhoneme(ph); +400 } +401 +402 private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { +403 final Map<String, List<Rule>> lines = new HashMap<>(); +404 int currentLine = 0; +405 +406 boolean inMultilineComment = false; +407 while (scanner.hasNextLine()) { +408 currentLine++; +409 final String rawLine = scanner.nextLine(); +410 String line = rawLine; +411 +412 if (inMultilineComment) { +413 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +414 inMultilineComment = false; +415 } +416 } else { +417 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +418 inMultilineComment = true; +419 } else { +420 // discard comments +421 final int cmtI = line.indexOf(ResourceConstants.CMT); +422 if (cmtI >= 0) { +423 line = line.substring(0, cmtI); +424 } +425 +426 // trim leading-trailing whitespace +427 line = line.trim(); +428 +429 if (line.length() == 0) { +430 continue; // empty lines can be safely skipped +431 } +432 +433 if (line.startsWith(HASH_INCLUDE)) { +434 // include statement +435 final String incl = line.substring(HASH_INCLUDE.length()).trim(); +436 if (incl.contains(" ")) { +437 throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + +438 location); +439 } +440 try (final Scanner hashIncludeScanner = createScanner(incl)) { +441 lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); +442 } +443 } else { +444 // rule +445 final String[] parts = line.split("\\s+"); +446 if (parts.length != 4) { +447 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +448 " parts: " + rawLine + " in " + location); +449 } +450 try { +451 final String pat = stripQuotes(parts[0]); +452 final String lCon = stripQuotes(parts[1]); +453 final String rCon = stripQuotes(parts[2]); +454 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); +455 final int cLine = currentLine; +456 final Rule r = new Rule(pat, lCon, rCon, ph) { +457 private final int myLine = cLine; +458 private final String loc = location; +459 +460 @Override +461 public String toString() { +462 final StringBuilder sb = new StringBuilder(); +463 sb.append("Rule"); +464 sb.append("{line=").append(myLine); +465 sb.append(", loc='").append(loc).append('\''); +466 sb.append(", pat='").append(pat).append('\''); +467 sb.append(", lcon='").append(lCon).append('\''); +468 sb.append(", rcon='").append(rCon).append('\''); +469 sb.append('}'); +470 return sb.toString(); +471 } +472 }; +473 final String patternKey = r.pattern.substring(0,1); +474 List<Rule> rules = lines.get(patternKey); +475 if (rules == null) { +476 rules = new ArrayList<>(); +477 lines.put(patternKey, rules); +478 } +479 rules.add(r); +480 } catch (final IllegalArgumentException e) { +481 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + +482 location, e); +483 } +484 } +485 } +486 } +487 } +488 +489 return lines; +490 } +491 +492 /** +493 * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. +494 * +495 * @param regex +496 * the regular expression to compile +497 * @return an RPattern that will match this regex +498 */ +499 private static RPattern pattern(final String regex) { +500 final boolean startsWith = regex.startsWith("^"); +501 final boolean endsWith = regex.endsWith("$"); +502 final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); +503 final boolean boxes = content.contains("["); +504 +505 if (!boxes) { +506 if (startsWith && endsWith) { +507 // exact match +508 if (content.length() == 0) { +509 // empty +510 return new RPattern() { +511 @Override +512 public boolean isMatch(final CharSequence input) { +513 return input.length() == 0; +514 } +515 }; +516 } +517 return new RPattern() { +518 @Override +519 public boolean isMatch(final CharSequence input) { +520 return input.equals(content); +521 } +522 }; +523 } else if ((startsWith || endsWith) && content.length() == 0) { +524 // matches every string +525 return ALL_STRINGS_RMATCHER; +526 } else if (startsWith) { +527 // matches from start +528 return new RPattern() { +529 @Override +530 public boolean isMatch(final CharSequence input) { +531 return startsWith(input, content); +532 } +533 }; +534 } else if (endsWith) { +535 // matches from start +536 return new RPattern() { +537 @Override +538 public boolean isMatch(final CharSequence input) { +539 return endsWith(input, content); +540 } +541 }; +542 } +543 } else { +544 final boolean startsWithBox = content.startsWith("["); +545 final boolean endsWithBox = content.endsWith("]"); +546 +547 if (startsWithBox && endsWithBox) { +548 String boxContent = content.substring(1, content.length() - 1); +549 if (!boxContent.contains("[")) { +550 // box containing alternatives +551 final boolean negate = boxContent.startsWith("^"); +552 if (negate) { +553 boxContent = boxContent.substring(1); +554 } +555 final String bContent = boxContent; +556 final boolean shouldMatch = !negate; +557 +558 if (startsWith && endsWith) { +559 // exact match +560 return new RPattern() { +561 @Override +562 public boolean isMatch(final CharSequence input) { +563 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; +564 } +565 }; +566 } else if (startsWith) { +567 // first char +568 return new RPattern() { +569 @Override +570 public boolean isMatch(final CharSequence input) { +571 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; +572 } +573 }; +574 } else if (endsWith) { +575 // last char +576 return new RPattern() { +577 @Override +578 public boolean isMatch(final CharSequence input) { +579 return input.length() > 0 && +580 contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; +581 } +582 }; +583 } +584 } +585 } +586 } +587 +588 return new RPattern() { +589 Pattern pattern = Pattern.compile(regex); +590 +591 @Override +592 public boolean isMatch(final CharSequence input) { +593 final Matcher matcher = pattern.matcher(input); +594 return matcher.find(); +595 } +596 }; +597 } +598 +599 private static boolean startsWith(final CharSequence input, final CharSequence prefix) { +600 if (prefix.length() > input.length()) { +601 return false; +602 } +603 for (int i = 0; i < prefix.length(); i++) { +604 if (input.charAt(i) != prefix.charAt(i)) { +605 return false; +606 } +607 } +608 return true; +609 } +610 +611 private static String stripQuotes(String str) { +612 if (str.startsWith(DOUBLE_QUOTE)) { +613 str = str.substring(1); +614 } +615 +616 if (str.endsWith(DOUBLE_QUOTE)) { +617 str = str.substring(0, str.length() - 1); +618 } +619 +620 return str; +621 } +622 +623 private final RPattern lContext; +624 +625 private final String pattern; +626 +627 private final PhonemeExpr phoneme; +628 +629 private final RPattern rContext; +630 +631 /** +632 * Creates a new rule. +633 * +634 * @param pattern +635 * the pattern +636 * @param lContext +637 * the left context +638 * @param rContext +639 * the right context +640 * @param phoneme +641 * the resulting phoneme +642 */ +643 public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { +644 this.pattern = pattern; +645 this.lContext = pattern(lContext + "$"); +646 this.rContext = pattern("^" + rContext); +647 this.phoneme = phoneme; +648 } +649 +650 /** +651 * Gets the left context. This is a regular expression that must match to the left of the pattern. +652 * +653 * @return the left context Pattern +654 */ +655 public RPattern getLContext() { +656 return this.lContext; +657 } +658 +659 /** +660 * Gets the pattern. This is a string-literal that must exactly match. +661 * +662 * @return the pattern +663 */ +664 public String getPattern() { +665 return this.pattern; +666 } +667 +668 /** +669 * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. +670 * +671 * @return the phoneme +672 */ +673 public PhonemeExpr getPhoneme() { +674 return this.phoneme; +675 } +676 +677 /** +678 * Gets the right context. This is a regular expression that must match to the right of the pattern. +679 * +680 * @return the right context Pattern +681 */ +682 public RPattern getRContext() { +683 return this.rContext; +684 } +685 +686 /** +687 * Decides if the pattern and context match the input starting at a position. It is a match if the +688 * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and +689 * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. +690 * +691 * @param input +692 * the input String +693 * @param i +694 * the int position within the input +695 * @return true if the pattern and left/right context match, false otherwise +696 */ +697 public boolean patternAndContextMatches(final CharSequence input, final int i) { +698 if (i < 0) { +699 throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); +700 } +701 +702 final int patternLength = this.pattern.length(); +703 final int ipl = i + patternLength; +704 +705 if (ipl > input.length()) { +706 // not enough room for the pattern to match +707 return false; +708 } +709 +710 // evaluate the pattern, left context and right context +711 // fail early if any of the evaluations is not successful +712 if (!input.subSequence(i, ipl).equals(this.pattern)) { +713 return false; +714 } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { +715 return false; +716 } +717 return this.lContext.isMatch(input.subSequence(0, i)); +718 } +719} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.Comparator; +025import java.util.EnumMap; +026import java.util.HashMap; +027import java.util.HashSet; +028import java.util.List; +029import java.util.Map; +030import java.util.Scanner; +031import java.util.Set; +032import java.util.regex.Matcher; +033import java.util.regex.Pattern; +034 +035import org.apache.commons.codec.language.bm.Languages.LanguageSet; +036 +037/** +038 * A phoneme rule. +039 * <p> +040 * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply +041 * and a logical flag indicating if all languages must be in play. A rule matches if: +042 * <ul> +043 * <li>the pattern matches at the current position</li> +044 * <li>the string up until the beginning of the pattern matches the left context</li> +045 * <li>the string from the end of the pattern matches the right context</li> +046 * <li>logical is ALL and all languages are in scope; or</li> +047 * <li>logical is any other value and at least one language is in scope</li> +048 * </ul> +049 * <p> +050 * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user +051 * to explicitly construct their own. +052 * <p> +053 * Rules are immutable and thread-safe. +054 * <p> +055 * <b>Rules resources</b> +056 * <p> +057 * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically +058 * named following the pattern: +059 * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> +060 * <p> +061 * The format of these resources is the following: +062 * <ul> +063 * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these +064 * will be interpreted as: +065 * <ol> +066 * <li>pattern</li> +067 * <li>left context</li> +068 * <li>right context</li> +069 * <li>phoneme</li> +070 * </ol> +071 * </li> +072 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded +073 * as a comment.</li> +074 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip +075 * all content until a line ending in '*' and '/' is found.</li> +076 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +077 * </ul> +078 * +079 * @since 1.6 +080 * @version $Id$ +081 */ +082public class Rule { +083 +084 public static final class Phoneme implements PhonemeExpr { +085 public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { +086 @Override +087 public int compare(final Phoneme o1, final Phoneme o2) { +088 for (int i = 0; i < o1.phonemeText.length(); i++) { +089 if (i >= o2.phonemeText.length()) { +090 return +1; +091 } +092 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); +093 if (c != 0) { +094 return c; +095 } +096 } +097 +098 if (o1.phonemeText.length() < o2.phonemeText.length()) { +099 return -1; +100 } +101 +102 return 0; +103 } +104 }; +105 +106 private final StringBuilder phonemeText; +107 private final Languages.LanguageSet languages; +108 +109 public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { +110 this.phonemeText = new StringBuilder(phonemeText); +111 this.languages = languages; +112 } +113 +114 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { +115 this(phonemeLeft.phonemeText, phonemeLeft.languages); +116 this.phonemeText.append(phonemeRight.phonemeText); +117 } +118 +119 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { +120 this(phonemeLeft.phonemeText, languages); +121 this.phonemeText.append(phonemeRight.phonemeText); +122 } +123 +124 public Phoneme append(final CharSequence str) { +125 this.phonemeText.append(str); +126 return this; +127 } +128 +129 public Languages.LanguageSet getLanguages() { +130 return this.languages; +131 } +132 +133 @Override +134 public Iterable<Phoneme> getPhonemes() { +135 return Collections.singleton(this); +136 } +137 +138 public CharSequence getPhonemeText() { +139 return this.phonemeText; +140 } +141 +142 /** +143 * Deprecated since 1.9. +144 * +145 * @param right the Phoneme to join +146 * @return a new Phoneme +147 * @deprecated since 1.9 +148 */ +149 @Deprecated +150 public Phoneme join(final Phoneme right) { +151 return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), +152 this.languages.restrictTo(right.languages)); +153 } +154 +155 /** +156 * Returns a new Phoneme with the same text but a union of its +157 * current language set and the given one. +158 * +159 * @param lang the language set to merge +160 * @return a new Phoneme +161 */ +162 public Phoneme mergeWithLanguage(final LanguageSet lang) { +163 return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); +164 } +165 +166 @Override +167 public String toString() { +168 return phonemeText.toString() + "[" + languages + "]"; +169 } +170 } +171 +172 public interface PhonemeExpr { +173 Iterable<Phoneme> getPhonemes(); +174 } +175 +176 public static final class PhonemeList implements PhonemeExpr { +177 private final List<Phoneme> phonemes; +178 +179 public PhonemeList(final List<Phoneme> phonemes) { +180 this.phonemes = phonemes; +181 } +182 +183 @Override +184 public List<Phoneme> getPhonemes() { +185 return this.phonemes; +186 } +187 } +188 +189 /** +190 * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. +191 */ +192 public interface RPattern { +193 boolean isMatch(CharSequence input); +194 } +195 +196 public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { +197 @Override +198 public boolean isMatch(final CharSequence input) { +199 return true; +200 } +201 }; +202 +203 public static final String ALL = "ALL"; +204 +205 private static final String DOUBLE_QUOTE = "\""; +206 +207 private static final String HASH_INCLUDE = "#include"; +208 +209 private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = +210 new EnumMap<>(NameType.class); +211 +212 static { +213 for (final NameType s : NameType.values()) { +214 final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = +215 new EnumMap<>(RuleType.class); +216 +217 for (final RuleType rt : RuleType.values()) { +218 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>(); +219 +220 final Languages ls = Languages.getInstance(s); +221 for (final String l : ls.getLanguages()) { +222 try (final Scanner scanner = createScanner(s, rt, l)) { +223 rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); +224 } catch (final IllegalStateException e) { +225 throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); +226 } +227 } +228 if (!rt.equals(RuleType.RULES)) { +229 try (final Scanner scanner = createScanner(s, rt, "common")) { +230 rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); +231 } +232 } +233 +234 rts.put(rt, Collections.unmodifiableMap(rs)); +235 } +236 +237 RULES.put(s, Collections.unmodifiableMap(rts)); +238 } +239 } +240 +241 private static boolean contains(final CharSequence chars, final char input) { +242 for (int i = 0; i < chars.length(); i++) { +243 if (chars.charAt(i) == input) { +244 return true; +245 } +246 } +247 return false; +248 } +249 +250 private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { +251 return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", +252 nameType.getName(), rt.getName(), lang); +253 } +254 +255 private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { +256 final String resName = createResourceName(nameType, rt, lang); +257 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +258 +259 if (rulesIS == null) { +260 throw new IllegalArgumentException("Unable to load resource: " + resName); +261 } +262 +263 return new Scanner(rulesIS, ResourceConstants.ENCODING); +264 } +265 +266 private static Scanner createScanner(final String lang) { +267 final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); +268 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +269 +270 if (rulesIS == null) { +271 throw new IllegalArgumentException("Unable to load resource: " + resName); +272 } +273 +274 return new Scanner(rulesIS, ResourceConstants.ENCODING); +275 } +276 +277 private static boolean endsWith(final CharSequence input, final CharSequence suffix) { +278 if (suffix.length() > input.length()) { +279 return false; +280 } +281 for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { +282 if (input.charAt(i) != suffix.charAt(j)) { +283 return false; +284 } +285 } +286 return true; +287 } +288 +289 /** +290 * Gets rules for a combination of name type, rule type and languages. +291 * +292 * @param nameType +293 * the NameType to consider +294 * @param rt +295 * the RuleType to consider +296 * @param langs +297 * the set of languages to consider +298 * @return a list of Rules that apply +299 */ +300 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, +301 final Languages.LanguageSet langs) { +302 final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); +303 final List<Rule> allRules = new ArrayList<>(); +304 for (final List<Rule> rules : ruleMap.values()) { +305 allRules.addAll(rules); +306 } +307 return allRules; +308 } +309 +310 /** +311 * Gets rules for a combination of name type, rule type and a single language. +312 * +313 * @param nameType +314 * the NameType to consider +315 * @param rt +316 * the RuleType to consider +317 * @param lang +318 * the language to consider +319 * @return a list of Rules that apply +320 */ +321 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { +322 return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); +323 } +324 +325 /** +326 * Gets rules for a combination of name type, rule type and languages. +327 * +328 * @param nameType +329 * the NameType to consider +330 * @param rt +331 * the RuleType to consider +332 * @param langs +333 * the set of languages to consider +334 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +335 * @since 1.9 +336 */ +337 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +338 final Languages.LanguageSet langs) { +339 return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : +340 getInstanceMap(nameType, rt, Languages.ANY); +341 } +342 +343 /** +344 * Gets rules for a combination of name type, rule type and a single language. +345 * +346 * @param nameType +347 * the NameType to consider +348 * @param rt +349 * the RuleType to consider +350 * @param lang +351 * the language to consider +352 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +353 * @since 1.9 +354 */ +355 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +356 final String lang) { +357 final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); +358 +359 if (rules == null) { +360 throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", +361 nameType.getName(), rt.getName(), lang)); +362 } +363 +364 return rules; +365 } +366 +367 private static Phoneme parsePhoneme(final String ph) { +368 final int open = ph.indexOf("["); +369 if (open >= 0) { +370 if (!ph.endsWith("]")) { +371 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); +372 } +373 final String before = ph.substring(0, open); +374 final String in = ph.substring(open + 1, ph.length() - 1); +375 final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]"))); +376 +377 return new Phoneme(before, Languages.LanguageSet.from(langs)); +378 } +379 return new Phoneme(ph, Languages.ANY_LANGUAGE); +380 } +381 +382 private static PhonemeExpr parsePhonemeExpr(final String ph) { +383 if (ph.startsWith("(")) { // we have a bracketed list of options +384 if (!ph.endsWith(")")) { +385 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); +386 } +387 +388 final List<Phoneme> phs = new ArrayList<>(); +389 final String body = ph.substring(1, ph.length() - 1); +390 for (final String part : body.split("[|]")) { +391 phs.add(parsePhoneme(part)); +392 } +393 if (body.startsWith("|") || body.endsWith("|")) { +394 phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); +395 } +396 +397 return new PhonemeList(phs); +398 } +399 return parsePhoneme(ph); +400 } +401 +402 private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { +403 final Map<String, List<Rule>> lines = new HashMap<>(); +404 int currentLine = 0; +405 +406 boolean inMultilineComment = false; +407 while (scanner.hasNextLine()) { +408 currentLine++; +409 final String rawLine = scanner.nextLine(); +410 String line = rawLine; +411 +412 if (inMultilineComment) { +413 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +414 inMultilineComment = false; +415 } +416 } else { +417 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +418 inMultilineComment = true; +419 } else { +420 // discard comments +421 final int cmtI = line.indexOf(ResourceConstants.CMT); +422 if (cmtI >= 0) { +423 line = line.substring(0, cmtI); +424 } +425 +426 // trim leading-trailing whitespace +427 line = line.trim(); +428 +429 if (line.length() == 0) { +430 continue; // empty lines can be safely skipped +431 } +432 +433 if (line.startsWith(HASH_INCLUDE)) { +434 // include statement +435 final String incl = line.substring(HASH_INCLUDE.length()).trim(); +436 if (incl.contains(" ")) { +437 throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + +438 location); +439 } +440 try (final Scanner hashIncludeScanner = createScanner(incl)) { +441 lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); +442 } +443 } else { +444 // rule +445 final String[] parts = line.split("\\s+"); +446 if (parts.length != 4) { +447 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +448 " parts: " + rawLine + " in " + location); +449 } +450 try { +451 final String pat = stripQuotes(parts[0]); +452 final String lCon = stripQuotes(parts[1]); +453 final String rCon = stripQuotes(parts[2]); +454 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); +455 final int cLine = currentLine; +456 final Rule r = new Rule(pat, lCon, rCon, ph) { +457 private final int myLine = cLine; +458 private final String loc = location; +459 +460 @Override +461 public String toString() { +462 final StringBuilder sb = new StringBuilder(); +463 sb.append("Rule"); +464 sb.append("{line=").append(myLine); +465 sb.append(", loc='").append(loc).append('\''); +466 sb.append(", pat='").append(pat).append('\''); +467 sb.append(", lcon='").append(lCon).append('\''); +468 sb.append(", rcon='").append(rCon).append('\''); +469 sb.append('}'); +470 return sb.toString(); +471 } +472 }; +473 final String patternKey = r.pattern.substring(0,1); +474 List<Rule> rules = lines.get(patternKey); +475 if (rules == null) { +476 rules = new ArrayList<>(); +477 lines.put(patternKey, rules); +478 } +479 rules.add(r); +480 } catch (final IllegalArgumentException e) { +481 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + +482 location, e); +483 } +484 } +485 } +486 } +487 } +488 +489 return lines; +490 } +491 +492 /** +493 * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. +494 * +495 * @param regex +496 * the regular expression to compile +497 * @return an RPattern that will match this regex +498 */ +499 private static RPattern pattern(final String regex) { +500 final boolean startsWith = regex.startsWith("^"); +501 final boolean endsWith = regex.endsWith("$"); +502 final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); +503 final boolean boxes = content.contains("["); +504 +505 if (!boxes) { +506 if (startsWith && endsWith) { +507 // exact match +508 if (content.length() == 0) { +509 // empty +510 return new RPattern() { +511 @Override +512 public boolean isMatch(final CharSequence input) { +513 return input.length() == 0; +514 } +515 }; +516 } +517 return new RPattern() { +518 @Override +519 public boolean isMatch(final CharSequence input) { +520 return input.equals(content); +521 } +522 }; +523 } else if ((startsWith || endsWith) && content.length() == 0) { +524 // matches every string +525 return ALL_STRINGS_RMATCHER; +526 } else if (startsWith) { +527 // matches from start +528 return new RPattern() { +529 @Override +530 public boolean isMatch(final CharSequence input) { +531 return startsWith(input, content); +532 } +533 }; +534 } else if (endsWith) { +535 // matches from start +536 return new RPattern() { +537 @Override +538 public boolean isMatch(final CharSequence input) { +539 return endsWith(input, content); +540 } +541 }; +542 } +543 } else { +544 final boolean startsWithBox = content.startsWith("["); +545 final boolean endsWithBox = content.endsWith("]"); +546 +547 if (startsWithBox && endsWithBox) { +548 String boxContent = content.substring(1, content.length() - 1); +549 if (!boxContent.contains("[")) { +550 // box containing alternatives +551 final boolean negate = boxContent.startsWith("^"); +552 if (negate) { +553 boxContent = boxContent.substring(1); +554 } +555 final String bContent = boxContent; +556 final boolean shouldMatch = !negate; +557 +558 if (startsWith && endsWith) { +559 // exact match +560 return new RPattern() { +561 @Override +562 public boolean isMatch(final CharSequence input) { +563 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; +564 } +565 }; +566 } else if (startsWith) { +567 // first char +568 return new RPattern() { +569 @Override +570 public boolean isMatch(final CharSequence input) { +571 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; +572 } +573 }; +574 } else if (endsWith) { +575 // last char +576 return new RPattern() { +577 @Override +578 public boolean isMatch(final CharSequence input) { +579 return input.length() > 0 && +580 contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; +581 } +582 }; +583 } +584 } +585 } +586 } +587 +588 return new RPattern() { +589 Pattern pattern = Pattern.compile(regex); +590 +591 @Override +592 public boolean isMatch(final CharSequence input) { +593 final Matcher matcher = pattern.matcher(input); +594 return matcher.find(); +595 } +596 }; +597 } +598 +599 private static boolean startsWith(final CharSequence input, final CharSequence prefix) { +600 if (prefix.length() > input.length()) { +601 return false; +602 } +603 for (int i = 0; i < prefix.length(); i++) { +604 if (input.charAt(i) != prefix.charAt(i)) { +605 return false; +606 } +607 } +608 return true; +609 } +610 +611 private static String stripQuotes(String str) { +612 if (str.startsWith(DOUBLE_QUOTE)) { +613 str = str.substring(1); +614 } +615 +616 if (str.endsWith(DOUBLE_QUOTE)) { +617 str = str.substring(0, str.length() - 1); +618 } +619 +620 return str; +621 } +622 +623 private final RPattern lContext; +624 +625 private final String pattern; +626 +627 private final PhonemeExpr phoneme; +628 +629 private final RPattern rContext; +630 +631 /** +632 * Creates a new rule. +633 * +634 * @param pattern +635 * the pattern +636 * @param lContext +637 * the left context +638 * @param rContext +639 * the right context +640 * @param phoneme +641 * the resulting phoneme +642 */ +643 public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { +644 this.pattern = pattern; +645 this.lContext = pattern(lContext + "$"); +646 this.rContext = pattern("^" + rContext); +647 this.phoneme = phoneme; +648 } +649 +650 /** +651 * Gets the left context. This is a regular expression that must match to the left of the pattern. +652 * +653 * @return the left context Pattern +654 */ +655 public RPattern getLContext() { +656 return this.lContext; +657 } +658 +659 /** +660 * Gets the pattern. This is a string-literal that must exactly match. +661 * +662 * @return the pattern +663 */ +664 public String getPattern() { +665 return this.pattern; +666 } +667 +668 /** +669 * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. +670 * +671 * @return the phoneme +672 */ +673 public PhonemeExpr getPhoneme() { +674 return this.phoneme; +675 } +676 +677 /** +678 * Gets the right context. This is a regular expression that must match to the right of the pattern. +679 * +680 * @return the right context Pattern +681 */ +682 public RPattern getRContext() { +683 return this.rContext; +684 } +685 +686 /** +687 * Decides if the pattern and context match the input starting at a position. It is a match if the +688 * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and +689 * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. +690 * +691 * @param input +692 * the input String +693 * @param i +694 * the int position within the input +695 * @return true if the pattern and left/right context match, false otherwise +696 */ +697 public boolean patternAndContextMatches(final CharSequence input, final int i) { +698 if (i < 0) { +699 throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); +700 } +701 +702 final int patternLength = this.pattern.length(); +703 final int ipl = i + patternLength; +704 +705 if (ipl > input.length()) { +706 // not enough room for the pattern to match +707 return false; +708 } +709 +710 // evaluate the pattern, left context and right context +711 // fail early if any of the evaluations is not successful +712 if (!input.subSequence(i, ipl).equals(this.pattern)) { +713 return false; +714 } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { +715 return false; +716 } +717 return this.lContext.isMatch(input.subSequence(0, i)); +718 } +719} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020import java.io.InputStream; +021import java.util.ArrayList; +022import java.util.Arrays; +023import java.util.Collections; +024import java.util.Comparator; +025import java.util.EnumMap; +026import java.util.HashMap; +027import java.util.HashSet; +028import java.util.List; +029import java.util.Map; +030import java.util.Scanner; +031import java.util.Set; +032import java.util.regex.Matcher; +033import java.util.regex.Pattern; +034 +035import org.apache.commons.codec.language.bm.Languages.LanguageSet; +036 +037/** +038 * A phoneme rule. +039 * <p> +040 * Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply +041 * and a logical flag indicating if all languages must be in play. A rule matches if: +042 * <ul> +043 * <li>the pattern matches at the current position</li> +044 * <li>the string up until the beginning of the pattern matches the left context</li> +045 * <li>the string from the end of the pattern matches the right context</li> +046 * <li>logical is ALL and all languages are in scope; or</li> +047 * <li>logical is any other value and at least one language is in scope</li> +048 * </ul> +049 * <p> +050 * Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user +051 * to explicitly construct their own. +052 * <p> +053 * Rules are immutable and thread-safe. +054 * <p> +055 * <b>Rules resources</b> +056 * <p> +057 * Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically +058 * named following the pattern: +059 * <blockquote>org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt</blockquote> +060 * <p> +061 * The format of these resources is the following: +062 * <ul> +063 * <li><b>Rules:</b> whitespace separated, double-quoted strings. There should be 4 columns to each row, and these +064 * will be interpreted as: +065 * <ol> +066 * <li>pattern</li> +067 * <li>left context</li> +068 * <li>right context</li> +069 * <li>phoneme</li> +070 * </ol> +071 * </li> +072 * <li><b>End-of-line comments:</b> Any occurrence of '//' will cause all text following on that line to be discarded +073 * as a comment.</li> +074 * <li><b>Multi-line comments:</b> Any line starting with '/*' will start multi-line commenting mode. This will skip +075 * all content until a line ending in '*' and '/' is found.</li> +076 * <li><b>Blank lines:</b> All blank lines will be skipped.</li> +077 * </ul> +078 * +079 * @since 1.6 +080 * @version $Id$ +081 */ +082public class Rule { +083 +084 public static final class Phoneme implements PhonemeExpr { +085 public static final Comparator<Phoneme> COMPARATOR = new Comparator<Phoneme>() { +086 @Override +087 public int compare(final Phoneme o1, final Phoneme o2) { +088 for (int i = 0; i < o1.phonemeText.length(); i++) { +089 if (i >= o2.phonemeText.length()) { +090 return +1; +091 } +092 final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); +093 if (c != 0) { +094 return c; +095 } +096 } +097 +098 if (o1.phonemeText.length() < o2.phonemeText.length()) { +099 return -1; +100 } +101 +102 return 0; +103 } +104 }; +105 +106 private final StringBuilder phonemeText; +107 private final Languages.LanguageSet languages; +108 +109 public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { +110 this.phonemeText = new StringBuilder(phonemeText); +111 this.languages = languages; +112 } +113 +114 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { +115 this(phonemeLeft.phonemeText, phonemeLeft.languages); +116 this.phonemeText.append(phonemeRight.phonemeText); +117 } +118 +119 public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { +120 this(phonemeLeft.phonemeText, languages); +121 this.phonemeText.append(phonemeRight.phonemeText); +122 } +123 +124 public Phoneme append(final CharSequence str) { +125 this.phonemeText.append(str); +126 return this; +127 } +128 +129 public Languages.LanguageSet getLanguages() { +130 return this.languages; +131 } +132 +133 @Override +134 public Iterable<Phoneme> getPhonemes() { +135 return Collections.singleton(this); +136 } +137 +138 public CharSequence getPhonemeText() { +139 return this.phonemeText; +140 } +141 +142 /** +143 * Deprecated since 1.9. +144 * +145 * @param right the Phoneme to join +146 * @return a new Phoneme +147 * @deprecated since 1.9 +148 */ +149 @Deprecated +150 public Phoneme join(final Phoneme right) { +151 return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), +152 this.languages.restrictTo(right.languages)); +153 } +154 +155 /** +156 * Returns a new Phoneme with the same text but a union of its +157 * current language set and the given one. +158 * +159 * @param lang the language set to merge +160 * @return a new Phoneme +161 */ +162 public Phoneme mergeWithLanguage(final LanguageSet lang) { +163 return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); +164 } +165 +166 @Override +167 public String toString() { +168 return phonemeText.toString() + "[" + languages + "]"; +169 } +170 } +171 +172 public interface PhonemeExpr { +173 Iterable<Phoneme> getPhonemes(); +174 } +175 +176 public static final class PhonemeList implements PhonemeExpr { +177 private final List<Phoneme> phonemes; +178 +179 public PhonemeList(final List<Phoneme> phonemes) { +180 this.phonemes = phonemes; +181 } +182 +183 @Override +184 public List<Phoneme> getPhonemes() { +185 return this.phonemes; +186 } +187 } +188 +189 /** +190 * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. +191 */ +192 public interface RPattern { +193 boolean isMatch(CharSequence input); +194 } +195 +196 public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { +197 @Override +198 public boolean isMatch(final CharSequence input) { +199 return true; +200 } +201 }; +202 +203 public static final String ALL = "ALL"; +204 +205 private static final String DOUBLE_QUOTE = "\""; +206 +207 private static final String HASH_INCLUDE = "#include"; +208 +209 private static final Map<NameType, Map<RuleType, Map<String, Map<String, List<Rule>>>>> RULES = +210 new EnumMap<>(NameType.class); +211 +212 static { +213 for (final NameType s : NameType.values()) { +214 final Map<RuleType, Map<String, Map<String, List<Rule>>>> rts = +215 new EnumMap<>(RuleType.class); +216 +217 for (final RuleType rt : RuleType.values()) { +218 final Map<String, Map<String, List<Rule>>> rs = new HashMap<>(); +219 +220 final Languages ls = Languages.getInstance(s); +221 for (final String l : ls.getLanguages()) { +222 try (final Scanner scanner = createScanner(s, rt, l)) { +223 rs.put(l, parseRules(scanner, createResourceName(s, rt, l))); +224 } catch (final IllegalStateException e) { +225 throw new IllegalStateException("Problem processing " + createResourceName(s, rt, l), e); +226 } +227 } +228 if (!rt.equals(RuleType.RULES)) { +229 try (final Scanner scanner = createScanner(s, rt, "common")) { +230 rs.put("common", parseRules(scanner, createResourceName(s, rt, "common"))); +231 } +232 } +233 +234 rts.put(rt, Collections.unmodifiableMap(rs)); +235 } +236 +237 RULES.put(s, Collections.unmodifiableMap(rts)); +238 } +239 } +240 +241 private static boolean contains(final CharSequence chars, final char input) { +242 for (int i = 0; i < chars.length(); i++) { +243 if (chars.charAt(i) == input) { +244 return true; +245 } +246 } +247 return false; +248 } +249 +250 private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { +251 return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", +252 nameType.getName(), rt.getName(), lang); +253 } +254 +255 private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { +256 final String resName = createResourceName(nameType, rt, lang); +257 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +258 +259 if (rulesIS == null) { +260 throw new IllegalArgumentException("Unable to load resource: " + resName); +261 } +262 +263 return new Scanner(rulesIS, ResourceConstants.ENCODING); +264 } +265 +266 private static Scanner createScanner(final String lang) { +267 final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); +268 final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName); +269 +270 if (rulesIS == null) { +271 throw new IllegalArgumentException("Unable to load resource: " + resName); +272 } +273 +274 return new Scanner(rulesIS, ResourceConstants.ENCODING); +275 } +276 +277 private static boolean endsWith(final CharSequence input, final CharSequence suffix) { +278 if (suffix.length() > input.length()) { +279 return false; +280 } +281 for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { +282 if (input.charAt(i) != suffix.charAt(j)) { +283 return false; +284 } +285 } +286 return true; +287 } +288 +289 /** +290 * Gets rules for a combination of name type, rule type and languages. +291 * +292 * @param nameType +293 * the NameType to consider +294 * @param rt +295 * the RuleType to consider +296 * @param langs +297 * the set of languages to consider +298 * @return a list of Rules that apply +299 */ +300 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, +301 final Languages.LanguageSet langs) { +302 final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs); +303 final List<Rule> allRules = new ArrayList<>(); +304 for (final List<Rule> rules : ruleMap.values()) { +305 allRules.addAll(rules); +306 } +307 return allRules; +308 } +309 +310 /** +311 * Gets rules for a combination of name type, rule type and a single language. +312 * +313 * @param nameType +314 * the NameType to consider +315 * @param rt +316 * the RuleType to consider +317 * @param lang +318 * the language to consider +319 * @return a list of Rules that apply +320 */ +321 public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) { +322 return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); +323 } +324 +325 /** +326 * Gets rules for a combination of name type, rule type and languages. +327 * +328 * @param nameType +329 * the NameType to consider +330 * @param rt +331 * the RuleType to consider +332 * @param langs +333 * the set of languages to consider +334 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +335 * @since 1.9 +336 */ +337 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +338 final Languages.LanguageSet langs) { +339 return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : +340 getInstanceMap(nameType, rt, Languages.ANY); +341 } +342 +343 /** +344 * Gets rules for a combination of name type, rule type and a single language. +345 * +346 * @param nameType +347 * the NameType to consider +348 * @param rt +349 * the RuleType to consider +350 * @param lang +351 * the language to consider +352 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern +353 * @since 1.9 +354 */ +355 public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt, +356 final String lang) { +357 final Map<String, List<Rule>> rules = RULES.get(nameType).get(rt).get(lang); +358 +359 if (rules == null) { +360 throw new IllegalArgumentException(String.format("No rules found for %s, %s, %s.", +361 nameType.getName(), rt.getName(), lang)); +362 } +363 +364 return rules; +365 } +366 +367 private static Phoneme parsePhoneme(final String ph) { +368 final int open = ph.indexOf("["); +369 if (open >= 0) { +370 if (!ph.endsWith("]")) { +371 throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); +372 } +373 final String before = ph.substring(0, open); +374 final String in = ph.substring(open + 1, ph.length() - 1); +375 final Set<String> langs = new HashSet<>(Arrays.asList(in.split("[+]"))); +376 +377 return new Phoneme(before, Languages.LanguageSet.from(langs)); +378 } +379 return new Phoneme(ph, Languages.ANY_LANGUAGE); +380 } +381 +382 private static PhonemeExpr parsePhonemeExpr(final String ph) { +383 if (ph.startsWith("(")) { // we have a bracketed list of options +384 if (!ph.endsWith(")")) { +385 throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); +386 } +387 +388 final List<Phoneme> phs = new ArrayList<>(); +389 final String body = ph.substring(1, ph.length() - 1); +390 for (final String part : body.split("[|]")) { +391 phs.add(parsePhoneme(part)); +392 } +393 if (body.startsWith("|") || body.endsWith("|")) { +394 phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); +395 } +396 +397 return new PhonemeList(phs); +398 } +399 return parsePhoneme(ph); +400 } +401 +402 private static Map<String, List<Rule>> parseRules(final Scanner scanner, final String location) { +403 final Map<String, List<Rule>> lines = new HashMap<>(); +404 int currentLine = 0; +405 +406 boolean inMultilineComment = false; +407 while (scanner.hasNextLine()) { +408 currentLine++; +409 final String rawLine = scanner.nextLine(); +410 String line = rawLine; +411 +412 if (inMultilineComment) { +413 if (line.endsWith(ResourceConstants.EXT_CMT_END)) { +414 inMultilineComment = false; +415 } +416 } else { +417 if (line.startsWith(ResourceConstants.EXT_CMT_START)) { +418 inMultilineComment = true; +419 } else { +420 // discard comments +421 final int cmtI = line.indexOf(ResourceConstants.CMT); +422 if (cmtI >= 0) { +423 line = line.substring(0, cmtI); +424 } +425 +426 // trim leading-trailing whitespace +427 line = line.trim(); +428 +429 if (line.length() == 0) { +430 continue; // empty lines can be safely skipped +431 } +432 +433 if (line.startsWith(HASH_INCLUDE)) { +434 // include statement +435 final String incl = line.substring(HASH_INCLUDE.length()).trim(); +436 if (incl.contains(" ")) { +437 throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + +438 location); +439 } +440 try (final Scanner hashIncludeScanner = createScanner(incl)) { +441 lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); +442 } +443 } else { +444 // rule +445 final String[] parts = line.split("\\s+"); +446 if (parts.length != 4) { +447 throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + +448 " parts: " + rawLine + " in " + location); +449 } +450 try { +451 final String pat = stripQuotes(parts[0]); +452 final String lCon = stripQuotes(parts[1]); +453 final String rCon = stripQuotes(parts[2]); +454 final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); +455 final int cLine = currentLine; +456 final Rule r = new Rule(pat, lCon, rCon, ph) { +457 private final int myLine = cLine; +458 private final String loc = location; +459 +460 @Override +461 public String toString() { +462 final StringBuilder sb = new StringBuilder(); +463 sb.append("Rule"); +464 sb.append("{line=").append(myLine); +465 sb.append(", loc='").append(loc).append('\''); +466 sb.append(", pat='").append(pat).append('\''); +467 sb.append(", lcon='").append(lCon).append('\''); +468 sb.append(", rcon='").append(rCon).append('\''); +469 sb.append('}'); +470 return sb.toString(); +471 } +472 }; +473 final String patternKey = r.pattern.substring(0,1); +474 List<Rule> rules = lines.get(patternKey); +475 if (rules == null) { +476 rules = new ArrayList<>(); +477 lines.put(patternKey, rules); +478 } +479 rules.add(r); +480 } catch (final IllegalArgumentException e) { +481 throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + +482 location, e); +483 } +484 } +485 } +486 } +487 } +488 +489 return lines; +490 } +491 +492 /** +493 * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. +494 * +495 * @param regex +496 * the regular expression to compile +497 * @return an RPattern that will match this regex +498 */ +499 private static RPattern pattern(final String regex) { +500 final boolean startsWith = regex.startsWith("^"); +501 final boolean endsWith = regex.endsWith("$"); +502 final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); +503 final boolean boxes = content.contains("["); +504 +505 if (!boxes) { +506 if (startsWith && endsWith) { +507 // exact match +508 if (content.length() == 0) { +509 // empty +510 return new RPattern() { +511 @Override +512 public boolean isMatch(final CharSequence input) { +513 return input.length() == 0; +514 } +515 }; +516 } +517 return new RPattern() { +518 @Override +519 public boolean isMatch(final CharSequence input) { +520 return input.equals(content); +521 } +522 }; +523 } else if ((startsWith || endsWith) && content.length() == 0) { +524 // matches every string +525 return ALL_STRINGS_RMATCHER; +526 } else if (startsWith) { +527 // matches from start +528 return new RPattern() { +529 @Override +530 public boolean isMatch(final CharSequence input) { +531 return startsWith(input, content); +532 } +533 }; +534 } else if (endsWith) { +535 // matches from start +536 return new RPattern() { +537 @Override +538 public boolean isMatch(final CharSequence input) { +539 return endsWith(input, content); +540 } +541 }; +542 } +543 } else { +544 final boolean startsWithBox = content.startsWith("["); +545 final boolean endsWithBox = content.endsWith("]"); +546 +547 if (startsWithBox && endsWithBox) { +548 String boxContent = content.substring(1, content.length() - 1); +549 if (!boxContent.contains("[")) { +550 // box containing alternatives +551 final boolean negate = boxContent.startsWith("^"); +552 if (negate) { +553 boxContent = boxContent.substring(1); +554 } +555 final String bContent = boxContent; +556 final boolean shouldMatch = !negate; +557 +558 if (startsWith && endsWith) { +559 // exact match +560 return new RPattern() { +561 @Override +562 public boolean isMatch(final CharSequence input) { +563 return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; +564 } +565 }; +566 } else if (startsWith) { +567 // first char +568 return new RPattern() { +569 @Override +570 public boolean isMatch(final CharSequence input) { +571 return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; +572 } +573 }; +574 } else if (endsWith) { +575 // last char +576 return new RPattern() { +577 @Override +578 public boolean isMatch(final CharSequence input) { +579 return input.length() > 0 && +580 contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; +581 } +582 }; +583 } +584 } +585 } +586 } +587 +588 return new RPattern() { +589 Pattern pattern = Pattern.compile(regex); +590 +591 @Override +592 public boolean isMatch(final CharSequence input) { +593 final Matcher matcher = pattern.matcher(input); +594 return matcher.find(); +595 } +596 }; +597 } +598 +599 private static boolean startsWith(final CharSequence input, final CharSequence prefix) { +600 if (prefix.length() > input.length()) { +601 return false; +602 } +603 for (int i = 0; i < prefix.length(); i++) { +604 if (input.charAt(i) != prefix.charAt(i)) { +605 return false; +606 } +607 } +608 return true; +609 } +610 +611 private static String stripQuotes(String str) { +612 if (str.startsWith(DOUBLE_QUOTE)) { +613 str = str.substring(1); +614 } +615 +616 if (str.endsWith(DOUBLE_QUOTE)) { +617 str = str.substring(0, str.length() - 1); +618 } +619 +620 return str; +621 } +622 +623 private final RPattern lContext; +624 +625 private final String pattern; +626 +627 private final PhonemeExpr phoneme; +628 +629 private final RPattern rContext; +630 +631 /** +632 * Creates a new rule. +633 * +634 * @param pattern +635 * the pattern +636 * @param lContext +637 * the left context +638 * @param rContext +639 * the right context +640 * @param phoneme +641 * the resulting phoneme +642 */ +643 public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { +644 this.pattern = pattern; +645 this.lContext = pattern(lContext + "$"); +646 this.rContext = pattern("^" + rContext); +647 this.phoneme = phoneme; +648 } +649 +650 /** +651 * Gets the left context. This is a regular expression that must match to the left of the pattern. +652 * +653 * @return the left context Pattern +654 */ +655 public RPattern getLContext() { +656 return this.lContext; +657 } +658 +659 /** +660 * Gets the pattern. This is a string-literal that must exactly match. +661 * +662 * @return the pattern +663 */ +664 public String getPattern() { +665 return this.pattern; +666 } +667 +668 /** +669 * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. +670 * +671 * @return the phoneme +672 */ +673 public PhonemeExpr getPhoneme() { +674 return this.phoneme; +675 } +676 +677 /** +678 * Gets the right context. This is a regular expression that must match to the right of the pattern. +679 * +680 * @return the right context Pattern +681 */ +682 public RPattern getRContext() { +683 return this.rContext; +684 } +685 +686 /** +687 * Decides if the pattern and context match the input starting at a position. It is a match if the +688 * <code>lContext</code> matches <code>input</code> up to <code>i</code>, <code>pattern</code> matches at i and +689 * <code>rContext</code> matches from the end of the match of <code>pattern</code> to the end of <code>input</code>. +690 * +691 * @param input +692 * the input String +693 * @param i +694 * the int position within the input +695 * @return true if the pattern and left/right context match, false otherwise +696 */ +697 public boolean patternAndContextMatches(final CharSequence input, final int i) { +698 if (i < 0) { +699 throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); +700 } +701 +702 final int patternLength = this.pattern.length(); +703 final int ipl = i + patternLength; +704 +705 if (ipl > input.length()) { +706 // not enough room for the pattern to match +707 return false; +708 } +709 +710 // evaluate the pattern, left context and right context +711 // fail early if any of the evaluations is not successful +712 if (!input.subSequence(i, ipl).equals(this.pattern)) { +713 return false; +714 } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { +715 return false; +716 } +717 return this.lContext.isMatch(input.subSequence(0, i)); +718 } +719} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.language.bm; +019 +020/** +021 * Types of rule. +022 * +023 * @since 1.6 +024 * @version $Id$ +025 */ +026public enum RuleType { +027 +028 /** Approximate rules, which will lead to the largest number of phonetic interpretations. */ +029 APPROX("approx"), +030 /** Exact rules, which will lead to a minimum number of phonetic interpretations. */ +031 EXACT("exact"), +032 /** For internal use only. Please use {@link #APPROX} or {@link #EXACT}. */ +033 RULES("rules"); +034 +035 private final String name; +036 +037 RuleType(final String name) { +038 this.name = name; +039 } +040 +041 /** +042 * Gets the rule name. +043 * +044 * @return the rule name. +045 */ +046 public String getName() { +047 return this.name; +048 } +049 +050} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.net; +019 +020import java.io.UnsupportedEncodingException; +021import java.nio.charset.Charset; +022 +023import org.apache.commons.codec.Charsets; +024import org.apache.commons.codec.DecoderException; +025import org.apache.commons.codec.EncoderException; +026import org.apache.commons.codec.StringDecoder; +027import org.apache.commons.codec.StringEncoder; +028import org.apache.commons.codec.binary.Base64; +029 +030/** +031 * Identical to the Base64 encoding defined by <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> +032 * and allows a character set to be specified. +033 * <p> +034 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII +035 * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message +036 * handling software. +037 * <p> +038 * This class is immutable and thread-safe. +039 * +040 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message +041 * Header Extensions for Non-ASCII Text</a> +042 * +043 * @since 1.3 +044 * @version $Id$ +045 */ +046public class BCodec extends RFC1522Codec implements StringEncoder, StringDecoder { +047 /** +048 * The default Charset used for string decoding and encoding. +049 */ +050 private final Charset charset; +051 +052 /** +053 * Default constructor. +054 */ +055 public BCodec() { +056 this(Charsets.UTF_8); +057 } +058 +059 /** +060 * Constructor which allows for the selection of a default Charset +061 * +062 * @param charset +063 * the default string Charset to use. +064 * +065 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +066 * @since 1.7 +067 */ +068 public BCodec(final Charset charset) { +069 this.charset = charset; +070 } +071 +072 /** +073 * Constructor which allows for the selection of a default Charset +074 * +075 * @param charsetName +076 * the default Charset to use. +077 * @throws java.nio.charset.UnsupportedCharsetException +078 * If the named Charset is unavailable +079 * @since 1.7 throws UnsupportedCharsetException if the named Charset is unavailable +080 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +081 */ +082 public BCodec(final String charsetName) { +083 this(Charset.forName(charsetName)); +084 } +085 +086 @Override +087 protected String getEncoding() { +088 return "B"; +089 } +090 +091 @Override +092 protected byte[] doEncoding(final byte[] bytes) { +093 if (bytes == null) { +094 return null; +095 } +096 return Base64.encodeBase64(bytes); +097 } +098 +099 @Override +100 protected byte[] doDecoding(final byte[] bytes) { +101 if (bytes == null) { +102 return null; +103 } +104 return Base64.decodeBase64(bytes); +105 } +106 +107 /** +108 * Encodes a string into its Base64 form using the specified Charset. Unsafe characters are escaped. +109 * +110 * @param strSource +111 * string to convert to Base64 form +112 * @param sourceCharset +113 * the Charset for <code>value</code> +114 * @return Base64 string +115 * @throws EncoderException +116 * thrown if a failure condition is encountered during the encoding process. +117 * @since 1.7 +118 */ +119 public String encode(final String strSource, final Charset sourceCharset) throws EncoderException { +120 if (strSource == null) { +121 return null; +122 } +123 return encodeText(strSource, sourceCharset); +124 } +125 +126 /** +127 * Encodes a string into its Base64 form using the specified Charset. Unsafe characters are escaped. +128 * +129 * @param strSource +130 * string to convert to Base64 form +131 * @param sourceCharset +132 * the Charset for <code>value</code> +133 * @return Base64 string +134 * @throws EncoderException +135 * thrown if a failure condition is encountered during the encoding process. +136 */ +137 public String encode(final String strSource, final String sourceCharset) throws EncoderException { +138 if (strSource == null) { +139 return null; +140 } +141 try { +142 return this.encodeText(strSource, sourceCharset); +143 } catch (final UnsupportedEncodingException e) { +144 throw new EncoderException(e.getMessage(), e); +145 } +146 } +147 +148 /** +149 * Encodes a string into its Base64 form using the default Charset. Unsafe characters are escaped. +150 * +151 * @param strSource +152 * string to convert to Base64 form +153 * @return Base64 string +154 * @throws EncoderException +155 * thrown if a failure condition is encountered during the encoding process. +156 */ +157 @Override +158 public String encode(final String strSource) throws EncoderException { +159 if (strSource == null) { +160 return null; +161 } +162 return encode(strSource, this.getCharset()); +163 } +164 +165 /** +166 * Decodes a Base64 string into its original form. Escaped characters are converted back to their original +167 * representation. +168 * +169 * @param value +170 * Base64 string to convert into its original form +171 * @return original string +172 * @throws DecoderException +173 * A decoder exception is thrown if a failure condition is encountered during the decode process. +174 */ +175 @Override +176 public String decode(final String value) throws DecoderException { +177 if (value == null) { +178 return null; +179 } +180 try { +181 return this.decodeText(value); +182 } catch (final UnsupportedEncodingException e) { +183 throw new DecoderException(e.getMessage(), e); +184 } +185 } +186 +187 /** +188 * Encodes an object into its Base64 form using the default Charset. Unsafe characters are escaped. +189 * +190 * @param value +191 * object to convert to Base64 form +192 * @return Base64 object +193 * @throws EncoderException +194 * thrown if a failure condition is encountered during the encoding process. +195 */ +196 @Override +197 public Object encode(final Object value) throws EncoderException { +198 if (value == null) { +199 return null; +200 } else if (value instanceof String) { +201 return encode((String) value); +202 } else { +203 throw new EncoderException("Objects of type " + +204 value.getClass().getName() + +205 " cannot be encoded using BCodec"); +206 } +207 } +208 +209 /** +210 * Decodes a Base64 object into its original form. Escaped characters are converted back to their original +211 * representation. +212 * +213 * @param value +214 * Base64 object to convert into its original form +215 * @return original object +216 * @throws DecoderException +217 * Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered +218 * during the decode process. +219 */ +220 @Override +221 public Object decode(final Object value) throws DecoderException { +222 if (value == null) { +223 return null; +224 } else if (value instanceof String) { +225 return decode((String) value); +226 } else { +227 throw new DecoderException("Objects of type " + +228 value.getClass().getName() + +229 " cannot be decoded using BCodec"); +230 } +231 } +232 +233 /** +234 * Gets the default Charset name used for string decoding and encoding. +235 * +236 * @return the default Charset name +237 * @since 1.7 +238 */ +239 public Charset getCharset() { +240 return this.charset; +241 } +242 +243 /** +244 * Gets the default Charset name used for string decoding and encoding. +245 * +246 * @return the default Charset name +247 */ +248 public String getDefaultCharset() { +249 return this.charset.name(); +250 } +251} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.net; +019 +020import java.nio.ByteBuffer; +021import java.util.BitSet; +022import org.apache.commons.codec.BinaryDecoder; +023import org.apache.commons.codec.BinaryEncoder; +024import org.apache.commons.codec.DecoderException; +025import org.apache.commons.codec.EncoderException; +026 +027/** +028 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of +029 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts +030 * of the URI. +031 * <p> +032 * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public +033 * interface only call the access +034 * </p> +035 * +036 * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> +037 * @since 1.12 +038 */ +039public class PercentCodec implements BinaryEncoder, BinaryDecoder { +040 +041 /** +042 * The escape character used by the Percent-Encoding in order to introduce an encoded character. +043 */ +044 +045 private final byte ESCAPE_CHAR = '%'; +046 +047 /** +048 * The bit set used to store the character that should be always encoded +049 */ +050 private final BitSet alwaysEncodeChars = new BitSet(); +051 +052 /** +053 * The flag defining if the space character should be encoded as '+' +054 */ +055 private final boolean plusForSpace; +056 +057 /** +058 * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups +059 */ +060 private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; +061 +062 /** +063 * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding +064 * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape +065 * character for Percent-Encoding. +066 */ +067 public PercentCodec() { +068 this.plusForSpace = false; +069 insertAlwaysEncodeChar(ESCAPE_CHAR); +070 } +071 +072 /** +073 * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should +074 * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that +075 * is used as escape character for Percent-Encoding. +076 * +077 * @param alwaysEncodeChars the unsafe characters that should always be encoded +078 * @param plusForSpace the flag defining if the space character should be encoded as '+' +079 */ +080 public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { +081 this.plusForSpace = plusForSpace; +082 insertAlwaysEncodeChars(alwaysEncodeChars); +083 } +084 +085 /** +086 * Adds the byte array into a BitSet for faster lookup +087 * +088 * @param alwaysEncodeCharsArray +089 */ +090 private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { +091 if (alwaysEncodeCharsArray != null) { +092 for (byte b : alwaysEncodeCharsArray) { +093 insertAlwaysEncodeChar(b); +094 } +095 } +096 insertAlwaysEncodeChar(ESCAPE_CHAR); +097 } +098 +099 /** +100 * Inserts a single character into a BitSet and maintains the min and max of the characters of the +101 * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. +102 * +103 * @param b the byte that is candidate for min and max limit +104 */ +105 private void insertAlwaysEncodeChar(final byte b) { +106 this.alwaysEncodeChars.set(b); +107 if (b < alwaysEncodeCharsMin) { +108 alwaysEncodeCharsMin = b; +109 } +110 if (b > alwaysEncodeCharsMax) { +111 alwaysEncodeCharsMax = b; +112 } +113 } +114 +115 /** +116 * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the +117 * US-ASCII characters that are configured to be always encoded. +118 */ +119 @Override +120 public byte[] encode(final byte[] bytes) throws EncoderException { +121 if (bytes == null) { +122 return null; +123 } +124 +125 int expectedEncodingBytes = expectedEncodingBytes(bytes); +126 boolean willEncode = expectedEncodingBytes != bytes.length; +127 if (willEncode || (plusForSpace && containsSpace(bytes))) { +128 return doEncode(bytes, expectedEncodingBytes, willEncode); +129 } +130 return bytes; +131 } +132 +133 private byte[] doEncode(final byte[] bytes, int expectedLength, boolean willEncode) { +134 final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); +135 for (final byte b : bytes) { +136 if (willEncode && canEncode(b)) { +137 byte bb = b; +138 if (bb < 0) { +139 bb = (byte) (256 + bb); +140 } +141 final char hex1 = Utils.hexDigit(bb >> 4); +142 final char hex2 = Utils.hexDigit(bb); +143 buffer.put(ESCAPE_CHAR); +144 buffer.put((byte) hex1); +145 buffer.put((byte) hex2); +146 } else { +147 if (plusForSpace && b == ' ') { +148 buffer.put((byte) '+'); +149 } else { +150 buffer.put(b); +151 } +152 } +153 } +154 return buffer.array(); +155 } +156 +157 private int expectedEncodingBytes(final byte[] bytes) { +158 int byteCount = 0; +159 for (final byte b : bytes) { +160 byteCount += canEncode(b) ? 3: 1; +161 } +162 return byteCount; +163 } +164 +165 private boolean containsSpace(final byte[] bytes) { +166 for (final byte b : bytes) { +167 if (b == ' ') { +168 return true; +169 } +170 } +171 return false; +172 } +173 +174 private boolean canEncode(final byte c) { +175 return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c)); +176 } +177 +178 private boolean inAlwaysEncodeCharsRange(final byte c) { +179 return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; +180 } +181 +182 private boolean isAsciiChar(final byte c) { +183 return c >= 0; +184 } +185 +186 /** +187 * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to +188 * decode the encoded characters to Unicode. +189 */ +190 @Override +191 public byte[] decode(final byte[] bytes) throws DecoderException { +192 if (bytes == null) { +193 return null; +194 } +195 +196 final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); +197 for (int i = 0; i < bytes.length; i++) { +198 final byte b = bytes[i]; +199 if (b == ESCAPE_CHAR) { +200 try { +201 final int u = Utils.digit16(bytes[++i]); +202 final int l = Utils.digit16(bytes[++i]); +203 buffer.put((byte) ((u << 4) + l)); +204 } catch (final ArrayIndexOutOfBoundsException e) { +205 throw new DecoderException("Invalid percent decoding: ", e); +206 } +207 } else { +208 if (plusForSpace && b == '+') { +209 buffer.put((byte) ' '); +210 } else { +211 buffer.put(b); +212 } +213 } +214 } +215 return buffer.array(); +216 } +217 +218 private int expectedDecodingBytes(final byte[] bytes) { +219 int byteCount = 0; +220 for (int i = 0; i < bytes.length; ) { +221 byte b = bytes[i]; +222 i += b == ESCAPE_CHAR ? 3: 1; +223 byteCount++; +224 } +225 return byteCount; +226 } +227 +228 /** +229 * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. +230 * +231 * @param obj the object to encode +232 * @return the encoding result byte[] as Object +233 * @throws EncoderException if the object is not a byte array +234 */ +235 @Override +236 public Object encode(final Object obj) throws EncoderException { +237 if (obj == null) { +238 return null; +239 } else if (obj instanceof byte[]) { +240 return encode((byte[]) obj); +241 } else { +242 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); +243 } +244 } +245 +246 /** +247 * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. +248 * +249 * @param obj the object to decode +250 * @return the decoding result byte[] as Object +251 * @throws DecoderException if the object is not a byte array +252 */ +253 @Override +254 public Object decode(final Object obj) throws DecoderException { +255 if (obj == null) { +256 return null; +257 } else if (obj instanceof byte[]) { +258 return decode((byte[]) obj); +259 } else { +260 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); +261 } +262 } +263} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.net; +019 +020import java.io.UnsupportedEncodingException; +021import java.nio.charset.Charset; +022import java.util.BitSet; +023 +024import org.apache.commons.codec.Charsets; +025import org.apache.commons.codec.DecoderException; +026import org.apache.commons.codec.EncoderException; +027import org.apache.commons.codec.StringDecoder; +028import org.apache.commons.codec.StringEncoder; +029 +030/** +031 * Similar to the Quoted-Printable content-transfer-encoding defined in +032 * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII +033 * characters to be decipherable on an ASCII terminal without decoding. +034 * <p> +035 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII +036 * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message +037 * handling software. +038 * <p> +039 * This class is conditionally thread-safe. +040 * The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)} +041 * but is not volatile, and accesses are not synchronised. +042 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation +043 * is used to ensure safe publication of the value between threads, and must not invoke +044 * {@link #setEncodeBlanks(boolean)} after initial setup. +045 * +046 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message +047 * Header Extensions for Non-ASCII Text</a> +048 * +049 * @since 1.3 +050 * @version $Id$ +051 */ +052public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder { +053 /** +054 * The default Charset used for string decoding and encoding. +055 */ +056 private final Charset charset; +057 +058 /** +059 * BitSet of printable characters as defined in RFC 1522. +060 */ +061 private static final BitSet PRINTABLE_CHARS = new BitSet(256); +062 // Static initializer for printable chars collection +063 static { +064 // alpha characters +065 PRINTABLE_CHARS.set(' '); +066 PRINTABLE_CHARS.set('!'); +067 PRINTABLE_CHARS.set('"'); +068 PRINTABLE_CHARS.set('#'); +069 PRINTABLE_CHARS.set('$'); +070 PRINTABLE_CHARS.set('%'); +071 PRINTABLE_CHARS.set('&'); +072 PRINTABLE_CHARS.set('\''); +073 PRINTABLE_CHARS.set('('); +074 PRINTABLE_CHARS.set(')'); +075 PRINTABLE_CHARS.set('*'); +076 PRINTABLE_CHARS.set('+'); +077 PRINTABLE_CHARS.set(','); +078 PRINTABLE_CHARS.set('-'); +079 PRINTABLE_CHARS.set('.'); +080 PRINTABLE_CHARS.set('/'); +081 for (int i = '0'; i <= '9'; i++) { +082 PRINTABLE_CHARS.set(i); +083 } +084 PRINTABLE_CHARS.set(':'); +085 PRINTABLE_CHARS.set(';'); +086 PRINTABLE_CHARS.set('<'); +087 PRINTABLE_CHARS.set('>'); +088 PRINTABLE_CHARS.set('@'); +089 for (int i = 'A'; i <= 'Z'; i++) { +090 PRINTABLE_CHARS.set(i); +091 } +092 PRINTABLE_CHARS.set('['); +093 PRINTABLE_CHARS.set('\\'); +094 PRINTABLE_CHARS.set(']'); +095 PRINTABLE_CHARS.set('^'); +096 PRINTABLE_CHARS.set('`'); +097 for (int i = 'a'; i <= 'z'; i++) { +098 PRINTABLE_CHARS.set(i); +099 } +100 PRINTABLE_CHARS.set('{'); +101 PRINTABLE_CHARS.set('|'); +102 PRINTABLE_CHARS.set('}'); +103 PRINTABLE_CHARS.set('~'); +104 } +105 +106 private static final byte SPACE = 32; +107 +108 private static final byte UNDERSCORE = 95; +109 +110 private boolean encodeBlanks = false; +111 +112 /** +113 * Default constructor. +114 */ +115 public QCodec() { +116 this(Charsets.UTF_8); +117 } +118 +119 /** +120 * Constructor which allows for the selection of a default Charset. +121 * +122 * @param charset +123 * the default string Charset to use. +124 * +125 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +126 * @since 1.7 +127 */ +128 public QCodec(final Charset charset) { +129 super(); +130 this.charset = charset; +131 } +132 +133 /** +134 * Constructor which allows for the selection of a default Charset. +135 * +136 * @param charsetName +137 * the Charset to use. +138 * @throws java.nio.charset.UnsupportedCharsetException +139 * If the named Charset is unavailable +140 * @since 1.7 throws UnsupportedCharsetException if the named Charset is unavailable +141 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> +142 */ +143 public QCodec(final String charsetName) { +144 this(Charset.forName(charsetName)); +145 } +146 +147 @Override +148 protected String getEncoding() { +149 return "Q"; +150 } +151 +152 @Override +153 protected byte[] doEncoding(final byte[] bytes) { +154 if (bytes == null) { +155 return null; +156 } +157 final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes); +158 if (this.encodeBlanks) { +159 for (int i = 0; i < data.length; i++) { +160 if (data[i] == SPACE) { +161 data[i] = UNDERSCORE; +162 } +163 } +164 } +165 return data; +166 } +167 +168 @Override +169 protected byte[] doDecoding(final byte[] bytes) throws DecoderException { +170 if (bytes == null) { +171 return null; +172 } +173 boolean hasUnderscores = false; +174 for (final byte b : bytes) { +175 if (b == UNDERSCORE) { +176 hasUnderscores = true; +177 break; +178 } +179 } +180 if (hasUnderscores) { +181 final byte[] tmp = new byte[bytes.length]; +182 for (int i = 0; i < bytes.length; i++) { +183 final byte b = bytes[i]; +184 if (b != UNDERSCORE) { +185 tmp[i] = b; +186 } else { +187 tmp[i] = SPACE; +188 } +189 } +190 return QuotedPrintableCodec.decodeQuotedPrintable(tmp); +191 } +192 return QuotedPrintableCodec.decodeQuotedPrintable(bytes); +193 } +194 +195 /** +196 * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped. +197 * +198 * @param sourceStr +199 * string to convert to quoted-printable form +200 * @param sourceCharset +201 * the Charset for sourceStr +202 * @return quoted-printable string +203 * @throws EncoderException +204 * thrown if a failure condition is encountered during the encoding process. +205 * @since 1.7 +206 */ +207 public String encode(final String sourceStr, final Charset sourceCharset) throws EncoderException { +208 if (sourceStr == null) { +209 return null; +210 } +211 return encodeText(sourceStr, sourceCharset); +212 } +213 +214 /** +215 * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped. +216 * +217 * @param sourceStr +218 * string to convert to quoted-printable form +219 * @param sourceCharset +220 * the Charset for sourceStr +221 * @return quoted-printable string +222 * @throws EncoderException +223 * thrown if a failure condition is encountered during the encoding process. +224 */ +225 public String encode(final String sourceStr, final String sourceCharset) throws EncoderException { +226 if (sourceStr == null) { +227 return null; +228 } +229 try { +230 return encodeText(sourceStr, sourceCharset); +231 } catch (final UnsupportedEncodingException e) { +232 throw new EncoderException(e.getMessage(), e); +233 } +234 } +235 +236 /** +237 * Encodes a string into its quoted-printable form using the default Charset. Unsafe characters are escaped. +238 * +239 * @param sourceStr +240 * string to convert to quoted-printable form +241 * @return quoted-printable string +242 * @throws EncoderException +243 * thrown if a failure condition is encountered during the encoding process. +244 */ +245 @Override +246 public String encode(final String sourceStr) throws EncoderException { +247 if (sourceStr == null) { +248 return null; +249 } +250 return encode(sourceStr, getCharset()); +251 } +252 +253 /** +254 * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original +255 * representation. +256 * +257 * @param str +258 * quoted-printable string to convert into its original form +259 * @return original string +260 * @throws DecoderException +261 * A decoder exception is thrown if a failure condition is encountered during the decode process. +262 */ +263 @Override +264 public String decode(final String str) throws DecoderException { +265 if (str == null) { +266 return null; +267 } +268 try { +269 return decodeText(str); +270 } catch (final UnsupportedEncodingException e) { +271 throw new DecoderException(e.getMessage(), e); +272 } +273 } +274 +275 /** +276 * Encodes an object into its quoted-printable form using the default Charset. Unsafe characters are escaped. +277 * +278 * @param obj +279 * object to convert to quoted-printable form +280 * @return quoted-printable object +281 * @throws EncoderException +282 * thrown if a failure condition is encountered during the encoding process. +283 */ +284 @Override +285 public Object encode(final Object obj) throws EncoderException { +286 if (obj == null) { +287 return null; +288 } else if (obj instanceof String) { +289 return encode((String) obj); +290 } else { +291 throw new EncoderException("Objects of type " + +292 obj.getClass().getName() + +293 " cannot be encoded using Q codec"); +294 } +295 } +296 +297 /** +298 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original +299 * representation. +300 * +301 * @param obj +302 * quoted-printable object to convert into its original form +303 * @return original object +304 * @throws DecoderException +305 * Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered +306 * during the decode process. +307 */ +308 @Override +309 public Object decode(final Object obj) throws DecoderException { +310 if (obj == null) { +311 return null; +312 } else if (obj instanceof String) { +313 return decode((String) obj); +314 } else { +315 throw new DecoderException("Objects of type " + +316 obj.getClass().getName() + +317 " cannot be decoded using Q codec"); +318 } +319 } +320 +321 /** +322 * Gets the default Charset name used for string decoding and encoding. +323 * +324 * @return the default Charset name +325 * @since 1.7 +326 */ +327 public Charset getCharset() { +328 return this.charset; +329 } +330 +331 /** +332 * Gets the default Charset name used for string decoding and encoding. +333 * +334 * @return the default Charset name +335 */ +336 public String getDefaultCharset() { +337 return this.charset.name(); +338 } +339 +340 /** +341 * Tests if optional transformation of SPACE characters is to be used +342 * +343 * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise +344 */ +345 public boolean isEncodeBlanks() { +346 return this.encodeBlanks; +347 } +348 +349 /** +350 * Defines whether optional transformation of SPACE characters is to be used +351 * +352 * @param b +353 * <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise +354 */ +355 public void setEncodeBlanks(final boolean b) { +356 this.encodeBlanks = b; +357 } +358} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.net; +019 +020import java.io.ByteArrayOutputStream; +021import java.io.UnsupportedEncodingException; +022import java.nio.charset.Charset; +023import java.nio.charset.IllegalCharsetNameException; +024import java.nio.charset.UnsupportedCharsetException; +025import java.util.BitSet; +026 +027import org.apache.commons.codec.BinaryDecoder; +028import org.apache.commons.codec.BinaryEncoder; +029import org.apache.commons.codec.Charsets; +030import org.apache.commons.codec.DecoderException; +031import org.apache.commons.codec.EncoderException; +032import org.apache.commons.codec.StringDecoder; +033import org.apache.commons.codec.StringEncoder; +034import org.apache.commons.codec.binary.StringUtils; +035 +036/** +037 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. +038 * <p> +039 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to +040 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are +041 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the +042 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable +043 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping +044 * gateway. +045 * <p> +046 * Note: +047 * <p> +048 * Depending on the selected {@code strict} parameter, this class will implement a different set of rules of the +049 * quoted-printable spec: +050 * <ul> +051 * <li>{@code strict=false}: only rules #1 and #2 are implemented +052 * <li>{@code strict=true}: all rules #1 through #5 are implemented +053 * </ul> +054 * Originally, this class only supported the non-strict mode, but the codec in this partial form could already be used +055 * for certain applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance +056 * Q codec. The strict mode has been added in 1.10. +057 * <p> +058 * This class is immutable and thread-safe. +059 * +060 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: +061 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> +062 * +063 * @since 1.3 +064 * @version $Id$ +065 */ +066public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { +067 /** +068 * The default Charset used for string decoding and encoding. +069 */ +070 private final Charset charset; +071 +072 /** +073 * Indicates whether soft line breaks shall be used during encoding (rule #3-5). +074 */ +075 private final boolean strict; +076 +077 /** +078 * BitSet of printable characters as defined in RFC 1521. +079 */ +080 private static final BitSet PRINTABLE_CHARS = new BitSet(256); +081 +082 private static final byte ESCAPE_CHAR = '='; +083 +084 private static final byte TAB = 9; +085 +086 private static final byte SPACE = 32; +087 +088 private static final byte CR = 13; +089 +090 private static final byte LF = 10; +091 +092 /** +093 * Safe line length for quoted printable encoded text. +094 */ +095 private static final int SAFE_LENGTH = 73; +096 +097 // Static initializer for printable chars collection +098 static { +099 // alpha characters +100 for (int i = 33; i <= 60; i++) { +101 PRINTABLE_CHARS.set(i); +102 } +103 for (int i = 62; i <= 126; i++) { +104 PRINTABLE_CHARS.set(i); +105 } +106 PRINTABLE_CHARS.set(TAB); +107 PRINTABLE_CHARS.set(SPACE); +108 } +109 +110 /** +111 * Default constructor, assumes default Charset of {@link Charsets#UTF_8} +112 */ +113 public QuotedPrintableCodec() { +114 this(Charsets.UTF_8, false); +115 } +116 +117 /** +118 * Constructor which allows for the selection of the strict mode. +119 * +120 * @param strict +121 * if {@code true}, soft line breaks will be used +122 * @since 1.10 +123 */ +124 public QuotedPrintableCodec(final boolean strict) { +125 this(Charsets.UTF_8, strict); +126 } +127 +128 /** +129 * Constructor which allows for the selection of a default Charset. +130 * +131 * @param charset +132 * the default string Charset to use. +133 * @since 1.7 +134 */ +135 public QuotedPrintableCodec(final Charset charset) { +136 this(charset, false); +137 } +138 +139 /** +140 * Constructor which allows for the selection of a default Charset and strict mode. +141 * +142 * @param charset +143 * the default string Charset to use. +144 * @param strict +145 * if {@code true}, soft line breaks will be used +146 * @since 1.10 +147 */ +148 public QuotedPrintableCodec(final Charset charset, final boolean strict) { +149 this.charset = charset; +150 this.strict = strict; +151 } +152 +153 /** +154 * Constructor which allows for the selection of a default Charset. +155 * +156 * @param charsetName +157 * the default string Charset to use. +158 * @throws UnsupportedCharsetException +159 * If no support for the named Charset is available +160 * in this instance of the Java virtual machine +161 * @throws IllegalArgumentException +162 * If the given charsetName is null +163 * @throws IllegalCharsetNameException +164 * If the given Charset name is illegal +165 * +166 * @since 1.7 throws UnsupportedCharsetException if the named Charset is unavailable +167 */ +168 public QuotedPrintableCodec(final String charsetName) +169 throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException { +170 this(Charset.forName(charsetName), false); +171 } +172 +173 /** +174 * Encodes byte into its quoted-printable representation. +175 * +176 * @param b +177 * byte to encode +178 * @param buffer +179 * the buffer to write to +180 * @return The number of bytes written to the <code>buffer</code> +181 */ +182 private static final int encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) { +183 buffer.write(ESCAPE_CHAR); +184 final char hex1 = Utils.hexDigit(b >> 4); +185 final char hex2 = Utils.hexDigit(b); +186 buffer.write(hex1); +187 buffer.write(hex2); +188 return 3; +189 } +190 +191 /** +192 * Return the byte at position <code>index</code> of the byte array and +193 * make sure it is unsigned. +194 * +195 * @param index +196 * position in the array +197 * @param bytes +198 * the byte array +199 * @return the unsigned octet at position <code>index</code> from the array +200 */ +201 private static int getUnsignedOctet(final int index, final byte[] bytes) { +202 int b = bytes[index]; +203 if (b < 0) { +204 b = 256 + b; +205 } +206 return b; +207 } +208 +209 /** +210 * Write a byte to the buffer. +211 * +212 * @param b +213 * byte to write +214 * @param encode +215 * indicates whether the octet shall be encoded +216 * @param buffer +217 * the buffer to write to +218 * @return the number of bytes that have been written to the buffer +219 */ +220 private static int encodeByte(final int b, final boolean encode, +221 final ByteArrayOutputStream buffer) { +222 if (encode) { +223 return encodeQuotedPrintable(b, buffer); +224 } +225 buffer.write(b); +226 return 1; +227 } +228 +229 /** +230 * Checks whether the given byte is whitespace. +231 * +232 * @param b +233 * byte to be checked +234 * @return <code>true</code> if the byte is either a space or tab character +235 */ +236 private static boolean isWhitespace(final int b) { +237 return b == SPACE || b == TAB; +238 } +239 +240 /** +241 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. +242 * <p> +243 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +244 * RFC 1521 and is suitable for encoding binary data and unformatted text. +245 * +246 * @param printable +247 * bitset of characters deemed quoted-printable +248 * @param bytes +249 * array of bytes to be encoded +250 * @return array of bytes containing quoted-printable data +251 */ +252 public static final byte[] encodeQuotedPrintable(final BitSet printable, final byte[] bytes) { +253 return encodeQuotedPrintable(printable, bytes, false); +254 } +255 +256 /** +257 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. +258 * <p> +259 * Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset +260 * or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +261 * RFC 1521 and is suitable for encoding binary data and unformatted text. +262 * +263 * @param printable +264 * bitset of characters deemed quoted-printable +265 * @param bytes +266 * array of bytes to be encoded +267 * @param strict +268 * if {@code true} the full ruleset is used, otherwise only rule #1 and rule #2 +269 * @return array of bytes containing quoted-printable data +270 * @since 1.10 +271 */ +272 public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes, final boolean strict) { +273 if (bytes == null) { +274 return null; +275 } +276 if (printable == null) { +277 printable = PRINTABLE_CHARS; +278 } +279 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); +280 +281 if (strict) { +282 int pos = 1; +283 // encode up to buffer.length - 3, the last three octets will be treated +284 // separately for simplification of note #3 +285 for (int i = 0; i < bytes.length - 3; i++) { +286 final int b = getUnsignedOctet(i, bytes); +287 if (pos < SAFE_LENGTH) { +288 // up to this length it is safe to add any byte, encoded or not +289 pos += encodeByte(b, !printable.get(b), buffer); +290 } else { +291 // rule #3: whitespace at the end of a line *must* be encoded +292 encodeByte(b, !printable.get(b) || isWhitespace(b), buffer); +293 +294 // rule #5: soft line break +295 buffer.write(ESCAPE_CHAR); +296 buffer.write(CR); +297 buffer.write(LF); +298 pos = 1; +299 } +300 } +301 +302 // rule #3: whitespace at the end of a line *must* be encoded +303 // if we would do a soft break line after this octet, encode whitespace +304 int b = getUnsignedOctet(bytes.length - 3, bytes); +305 boolean encode = !printable.get(b) || (isWhitespace(b) && pos > SAFE_LENGTH - 5); +306 pos += encodeByte(b, encode, buffer); +307 +308 // note #3: '=' *must not* be the ultimate or penultimate character +309 // simplification: if < 6 bytes left, do a soft line break as we may need +310 // exactly 6 bytes space for the last 2 bytes +311 if (pos > SAFE_LENGTH - 2) { +312 buffer.write(ESCAPE_CHAR); +313 buffer.write(CR); +314 buffer.write(LF); +315 } +316 for (int i = bytes.length - 2; i < bytes.length; i++) { +317 b = getUnsignedOctet(i, bytes); +318 // rule #3: trailing whitespace shall be encoded +319 encode = !printable.get(b) || (i > bytes.length - 2 && isWhitespace(b)); +320 encodeByte(b, encode, buffer); +321 } +322 } else { +323 for (final byte c : bytes) { +324 int b = c; +325 if (b < 0) { +326 b = 256 + b; +327 } +328 if (printable.get(b)) { +329 buffer.write(b); +330 } else { +331 encodeQuotedPrintable(b, buffer); +332 } +333 } +334 } +335 return buffer.toByteArray(); +336 } +337 +338 /** +339 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted +340 * back to their original representation. +341 * <p> +342 * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as +343 * defined in RFC 1521. +344 * +345 * @param bytes +346 * array of quoted-printable characters +347 * @return array of original bytes +348 * @throws DecoderException +349 * Thrown if quoted-printable decoding is unsuccessful +350 */ +351 public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException { +352 if (bytes == null) { +353 return null; +354 } +355 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); +356 for (int i = 0; i < bytes.length; i++) { +357 final int b = bytes[i]; +358 if (b == ESCAPE_CHAR) { +359 try { +360 // if the next octet is a CR we have found a soft line break +361 if (bytes[++i] == CR) { +362 continue; +363 } +364 final int u = Utils.digit16(bytes[i]); +365 final int l = Utils.digit16(bytes[++i]); +366 buffer.write((char) ((u << 4) + l)); +367 } catch (final ArrayIndexOutOfBoundsException e) { +368 throw new DecoderException("Invalid quoted-printable encoding", e); +369 } +370 } else if (b != CR && b != LF) { +371 // every other octet is appended except for CR & LF +372 buffer.write(b); +373 } +374 } +375 return buffer.toByteArray(); +376 } +377 +378 /** +379 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. +380 * <p> +381 * Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset +382 * or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +383 * RFC 1521 and is suitable for encoding binary data and unformatted text. +384 * +385 * @param bytes +386 * array of bytes to be encoded +387 * @return array of bytes containing quoted-printable data +388 */ +389 @Override +390 public byte[] encode(final byte[] bytes) { +391 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes, strict); +392 } +393 +394 /** +395 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted +396 * back to their original representation. +397 * <p> +398 * This function fully implements the quoted-printable encoding specification (rule #1 through rule #5) as +399 * defined in RFC 1521. +400 * +401 * @param bytes +402 * array of quoted-printable characters +403 * @return array of original bytes +404 * @throws DecoderException +405 * Thrown if quoted-printable decoding is unsuccessful +406 */ +407 @Override +408 public byte[] decode(final byte[] bytes) throws DecoderException { +409 return decodeQuotedPrintable(bytes); +410 } +411 +412 /** +413 * Encodes a string into its quoted-printable form using the default string Charset. Unsafe characters are escaped. +414 * <p> +415 * Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset +416 * or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +417 * RFC 1521 and is suitable for encoding binary data and unformatted text. +418 * +419 * @param sourceStr +420 * string to convert to quoted-printable form +421 * @return quoted-printable string +422 * @throws EncoderException +423 * Thrown if quoted-printable encoding is unsuccessful +424 * +425 * @see #getCharset() +426 */ +427 @Override +428 public String encode(final String sourceStr) throws EncoderException { +429 return this.encode(sourceStr, getCharset()); +430 } +431 +432 /** +433 * Decodes a quoted-printable string into its original form using the specified string Charset. Escaped characters +434 * are converted back to their original representation. +435 * +436 * @param sourceStr +437 * quoted-printable string to convert into its original form +438 * @param sourceCharset +439 * the original string Charset +440 * @return original string +441 * @throws DecoderException +442 * Thrown if quoted-printable decoding is unsuccessful +443 * @since 1.7 +444 */ +445 public String decode(final String sourceStr, final Charset sourceCharset) throws DecoderException { +446 if (sourceStr == null) { +447 return null; +448 } +449 return new String(this.decode(StringUtils.getBytesUsAscii(sourceStr)), sourceCharset); +450 } +451 +452 /** +453 * Decodes a quoted-printable string into its original form using the specified string Charset. Escaped characters +454 * are converted back to their original representation. +455 * +456 * @param sourceStr +457 * quoted-printable string to convert into its original form +458 * @param sourceCharset +459 * the original string Charset +460 * @return original string +461 * @throws DecoderException +462 * Thrown if quoted-printable decoding is unsuccessful +463 * @throws UnsupportedEncodingException +464 * Thrown if Charset is not supported +465 */ +466 public String decode(final String sourceStr, final String sourceCharset) throws DecoderException, UnsupportedEncodingException { +467 if (sourceStr == null) { +468 return null; +469 } +470 return new String(decode(StringUtils.getBytesUsAscii(sourceStr)), sourceCharset); +471 } +472 +473 /** +474 * Decodes a quoted-printable string into its original form using the default string Charset. Escaped characters are +475 * converted back to their original representation. +476 * +477 * @param sourceStr +478 * quoted-printable string to convert into its original form +479 * @return original string +480 * @throws DecoderException +481 * Thrown if quoted-printable decoding is unsuccessful. Thrown if Charset is not supported. +482 * @see #getCharset() +483 */ +484 @Override +485 public String decode(final String sourceStr) throws DecoderException { +486 return this.decode(sourceStr, this.getCharset()); +487 } +488 +489 /** +490 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. +491 * +492 * @param obj +493 * string to convert to a quoted-printable form +494 * @return quoted-printable object +495 * @throws EncoderException +496 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is +497 * unsuccessful +498 */ +499 @Override +500 public Object encode(final Object obj) throws EncoderException { +501 if (obj == null) { +502 return null; +503 } else if (obj instanceof byte[]) { +504 return encode((byte[]) obj); +505 } else if (obj instanceof String) { +506 return encode((String) obj); +507 } else { +508 throw new EncoderException("Objects of type " + +509 obj.getClass().getName() + +510 " cannot be quoted-printable encoded"); +511 } +512 } +513 +514 /** +515 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original +516 * representation. +517 * +518 * @param obj +519 * quoted-printable object to convert into its original form +520 * @return original object +521 * @throws DecoderException +522 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure +523 * condition is encountered during the decode process. +524 */ +525 @Override +526 public Object decode(final Object obj) throws DecoderException { +527 if (obj == null) { +528 return null; +529 } else if (obj instanceof byte[]) { +530 return decode((byte[]) obj); +531 } else if (obj instanceof String) { +532 return decode((String) obj); +533 } else { +534 throw new DecoderException("Objects of type " + +535 obj.getClass().getName() + +536 " cannot be quoted-printable decoded"); +537 } +538 } +539 +540 /** +541 * Gets the default Charset name used for string decoding and encoding. +542 * +543 * @return the default Charset name +544 * @since 1.7 +545 */ +546 public Charset getCharset() { +547 return this.charset; +548 } +549 +550 /** +551 * Gets the default Charset name used for string decoding and encoding. +552 * +553 * @return the default Charset name +554 */ +555 public String getDefaultCharset() { +556 return this.charset.name(); +557 } +558 +559 /** +560 * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped. +561 * <p> +562 * Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset +563 * or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +564 * RFC 1521 and is suitable for encoding binary data and unformatted text. +565 * +566 * @param sourceStr +567 * string to convert to quoted-printable form +568 * @param sourceCharset +569 * the Charset for sourceStr +570 * @return quoted-printable string +571 * @since 1.7 +572 */ +573 public String encode(final String sourceStr, final Charset sourceCharset) { +574 if (sourceStr == null) { +575 return null; +576 } +577 return StringUtils.newStringUsAscii(this.encode(sourceStr.getBytes(sourceCharset))); +578 } +579 +580 /** +581 * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped. +582 * <p> +583 * Depending on the selection of the {@code strict} parameter, this function either implements the full ruleset +584 * or only a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in +585 * RFC 1521 and is suitable for encoding binary data and unformatted text. +586 * +587 * @param sourceStr +588 * string to convert to quoted-printable form +589 * @param sourceCharset +590 * the Charset for sourceStr +591 * @return quoted-printable string +592 * @throws UnsupportedEncodingException +593 * Thrown if the Charset is not supported +594 */ +595 public String encode(final String sourceStr, final String sourceCharset) throws UnsupportedEncodingException { +596 if (sourceStr == null) { +597 return null; +598 } +599 return StringUtils.newStringUsAscii(encode(sourceStr.getBytes(sourceCharset))); +600 } +601} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++
001/* +002 * Licensed to the Apache Software Foundation (ASF) under one or more +003 * contributor license agreements. See the NOTICE file distributed with +004 * this work for additional information regarding copyright ownership. +005 * The ASF licenses this file to You under the Apache License, Version 2.0 +006 * (the "License"); you may not use this file except in compliance with +007 * the License. You may obtain a copy of the License at +008 * +009 * http://www.apache.org/licenses/LICENSE-2.0 +010 * +011 * Unless required by applicable law or agreed to in writing, software +012 * distributed under the License is distributed on an "AS IS" BASIS, +013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +014 * See the License for the specific language governing permissions and +015 * limitations under the License. +016 */ +017 +018package org.apache.commons.codec.net; +019 +020import java.io.ByteArrayOutputStream; +021import java.io.UnsupportedEncodingException; +022import java.util.BitSet; +023 +024import org.apache.commons.codec.BinaryDecoder; +025import org.apache.commons.codec.BinaryEncoder; +026import org.apache.commons.codec.CharEncoding; +027import org.apache.commons.codec.DecoderException; +028import org.apache.commons.codec.EncoderException; +029import org.apache.commons.codec.StringDecoder; +030import org.apache.commons.codec.StringEncoder; +031import org.apache.commons.codec.binary.StringUtils; +032 +033/** +034 * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding. +035 * <p> +036 * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and +037 * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below +038 * 1.4 rely on the platform's default charset encoding. +039 * <p> +040 * This class is thread-safe since 1.11 +041 * +042 * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a> +043 * of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification</a> +044 * +045 * @since 1.2 +046 * @version $Id$ +047 */ +048public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { +049 +050 /** +051 * The default charset used for string decoding and encoding. +052 * +053 * @deprecated TODO: This field will be changed to a private final Charset in 2.0. (CODEC-126) +054 */ +055 @Deprecated +056 protected volatile String charset; // added volatile: see CODEC-232 +057 +058 /** +059 * Release 1.5 made this field final. +060 */ +061 protected static final byte ESCAPE_CHAR = '%'; +062 +063 /** +064 * BitSet of www-form-url safe characters. +065 * This is a copy of the internal BitSet which is now used for the conversion. +066 * Changes to this field are ignored. +067 * @deprecated 1.11 Will be removed in 2.0 (CODEC-230) +068 */ +069 @Deprecated +070 protected static final BitSet WWW_FORM_URL; +071 +072 private static final BitSet WWW_FORM_URL_SAFE = new BitSet(256); +073 +074 // Static initializer for www_form_url +075 static { +076 // alpha characters +077 for (int i = 'a'; i <= 'z'; i++) { +078 WWW_FORM_URL_SAFE.set(i); +079 } +080 for (int i = 'A'; i <= 'Z'; i++) { +081 WWW_FORM_URL_SAFE.set(i); +082 } +083 // numeric characters +084 for (int i = '0'; i <= '9'; i++) { +085 WWW_FORM_URL_SAFE.set(i); +086 } +087 // special chars +088 WWW_FORM_URL_SAFE.set('-'); +089 WWW_FORM_URL_SAFE.set('_'); +090 WWW_FORM_URL_SAFE.set('.'); +091 WWW_FORM_URL_SAFE.set('*'); +092 // blank to be replaced with + +093 WWW_FORM_URL_SAFE.set(' '); +094 +095 // Create a copy in case anyone (ab)uses it +096 WWW_FORM_URL = (BitSet) WWW_FORM_URL_SAFE.clone(); +097 } +098 +099 +100 /** +101 * Default constructor. +102 */ +103 public URLCodec() { +104 this(CharEncoding.UTF_8); +105 } +106 +107 /** +108 * Constructor which allows for the selection of a default charset. +109 * +110 * @param charset the default string charset to use. +111 */ +112 public URLCodec(final String charset) { +113 super(); +114 this.charset = charset; +115 } +116 +117 /** +118 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. +119 * +120 * @param urlsafe +121 * bitset of characters deemed URL safe +122 * @param bytes +123 * array of bytes to convert to URL safe characters +124 * @return array of bytes containing URL safe characters +125 */ +126 public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) { +127 if (bytes == null) { +128 return null; +129 } +130 if (urlsafe == null) { +131 urlsafe = WWW_FORM_URL_SAFE; +132 } +133 +134 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); +135 for (final byte c : bytes) { +136 int b = c; +137 if (b < 0) { +138 b = 256 + b; +139 } +140 if (urlsafe.get(b)) { +141 if (b == ' ') { +142 b = '+'; +143 } +144 buffer.write(b); +145 } else { +146 buffer.write(ESCAPE_CHAR); +147 final char hex1 = Utils.hexDigit(b >> 4); +148 final char hex2 = Utils.hexDigit(b); +149 buffer.write(hex1); +150 buffer.write(hex2); +151 } +152 } +153 return buffer.toByteArray(); +154 } +155 +156 /** +157 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted +158 * back to their original representation. +159 * +160 * @param bytes +161 * array of URL safe characters +162 * @return array of original bytes +163 * @throws DecoderException +164 * Thrown if URL decoding is unsuccessful +165 */ +166 public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException { +167 if (bytes == null) { +168 return null; +169 } +170 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); +171 for (int i = 0; i < bytes.length; i++) { +172 final int b = bytes[i]; +173 if (b == '+') { +174 buffer.write(' '); +175 } else if (b == ESCAPE_CHAR) { +176 try { +177 final int u = Utils.digit16(bytes[++i]); +178 final int l = Utils.digit16(bytes[++i]); +179 buffer.write((char) ((u << 4) + l)); +180 } catch (final ArrayIndexOutOfBoundsException e) { +181 throw new DecoderException("Invalid URL encoding: ", e); +182 } +183 } else { +184 buffer.write(b); +185 } +186 } +187 return buffer.toByteArray(); +188 } +189 +190 /** +191 * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped. +192 * +193 * @param bytes +194 * array of bytes to convert to URL safe characters +195 * @return array of bytes containing URL safe characters +196 */ +197 @Override +198 public byte[] encode(final byte[] bytes) { +199 return encodeUrl(WWW_FORM_URL_SAFE, bytes); +200 } +201 +202 +203 /** +204 * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted +205 * back to their original representation. +206 * +207 * @param bytes +208 * array of URL safe characters +209 * @return array of original bytes +210 * @throws DecoderException +211 * Thrown if URL decoding is unsuccessful +212 */ +213 @Override +214 public byte[] decode(final byte[] bytes) throws DecoderException { +215 return decodeUrl(bytes); +216 } +217 +218 /** +219 * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped. +220 * +221 * @param str +222 * string to convert to a URL safe form +223 * @param charsetName +224 * the charset for str +225 * @return URL safe string +226 * @throws UnsupportedEncodingException +227 * Thrown if charset is not supported +228 */ +229 public String encode(final String str, final String charsetName) throws UnsupportedEncodingException { +230 if (str == null) { +231 return null; +232 } +233 return StringUtils.newStringUsAscii(encode(str.getBytes(charsetName))); +234 } +235 +236 /** +237 * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped. +238 * +239 * @param str +240 * string to convert to a URL safe form +241 * @return URL safe string +242 * @throws EncoderException +243 * Thrown if URL encoding is unsuccessful +244 * +245 * @see #getDefaultCharset() +246 */ +247 @Override +248 public String encode(final String str) throws EncoderException { +249 if (str == null) { +250 return null; +251 } +252 try { +253 return encode(str, getDefaultCharset()); +254 } catch (final UnsupportedEncodingException e) { +255 throw new EncoderException(e.getMessage(), e); +256 } +257 } +258 +259 +260 /** +261 * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted +262 * back to their original representation. +263 * +264 * @param str +265 * URL safe string to convert into its original form +266 * @param charsetName +267 * the original string charset +268 * @return original string +269 * @throws DecoderException +270 * Thrown if URL decoding is unsuccessful +271 * @throws UnsupportedEncodingException +272 * Thrown if charset is not supported +273 */ +274 public String decode(final String str, final String charsetName) +275 throws DecoderException, UnsupportedEncodingException { +276 if (str == null) { +277 return null; +278 } +279 return new String(decode(StringUtils.getBytesUsAscii(str)), charsetName); +280 } +281 +282 /** +283 * Decodes a URL safe string into its original form using the default string charset. Escaped characters are +284 * converted back to their original representation. +285 * +286 * @param str +287 * URL safe string to convert into its original form +288 * @return original string +289 * @throws DecoderException +290 * Thrown if URL decoding is unsuccessful +291 * @see #getDefaultCharset() +292 */ +293 @Override +294 public String decode(final String str) throws DecoderException { +295 if (str == null) { +296 return null; +297 } +298 try { +299 return decode(str, getDefaultCharset()); +300 } catch (final UnsupportedEncodingException e) { +301 throw new DecoderException(e.getMessage(), e); +302 } +303 } +304 +305 /** +306 * Encodes an object into its URL safe form. Unsafe characters are escaped. +307 * +308 * @param obj +309 * string to convert to a URL safe form +310 * @return URL safe object +311 * @throws EncoderException +312 * Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful +313 */ +314 @Override +315 public Object encode(final Object obj) throws EncoderException { +316 if (obj == null) { +317 return null; +318 } else if (obj instanceof byte[]) { +319 return encode((byte[])obj); +320 } else if (obj instanceof String) { +321 return encode((String)obj); +322 } else { +323 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded"); +324 +325 } +326 } +327 +328 /** +329 * Decodes a URL safe object into its original form. Escaped characters are converted back to their original +330 * representation. +331 * +332 * @param obj +333 * URL safe object to convert into its original form +334 * @return original object +335 * @throws DecoderException +336 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure +337 * condition is encountered during the decode process. +338 */ +339 @Override +340 public Object decode(final Object obj) throws DecoderException { +341 if (obj == null) { +342 return null; +343 } else if (obj instanceof byte[]) { +344 return decode((byte[]) obj); +345 } else if (obj instanceof String) { +346 return decode((String) obj); +347 } else { +348 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded"); +349 +350 } +351 } +352 +353 /** +354 * The default charset used for string decoding and encoding. +355 * +356 * @return the default string charset. +357 */ +358 public String getDefaultCharset() { +359 return this.charset; +360 } +361 +362 /** +363 * The <code>String</code> encoding used for decoding and encoding. +364 * +365 * @return Returns the encoding. +366 * +367 * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0. +368 */ +369 @Deprecated +370 public String getEncoding() { +371 return this.charset; +372 } +373 +374} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ++