@@ -497,7 +497,7 @@ public XmlStreamReader(final InputStream inputStream, final boolean lenient, fin
497497 final BOMInputStream bom = new BOMInputStream (new BufferedInputStream (Objects .requireNonNull (inputStream , "inputStream" ), IOUtils .DEFAULT_BUFFER_SIZE ),
498498 false , BOMS );
499499 final BOMInputStream pis = new BOMInputStream (bom , true , XML_GUESS_BYTES );
500- this .encoding = processHttpStream (bom , pis , lenient );
500+ this .encoding = toEncoding (bom , pis , lenient );
501501 this .reader = new InputStreamReader (pis , encoding );
502502 }
503503
@@ -602,7 +602,7 @@ public XmlStreamReader(final InputStream inputStream, final String httpContentTy
602602 final BOMInputStream bom = new BOMInputStream (new BufferedInputStream (Objects .requireNonNull (inputStream , "inputStream" ), IOUtils .DEFAULT_BUFFER_SIZE ),
603603 false , BOMS );
604604 final BOMInputStream pis = new BOMInputStream (bom , true , XML_GUESS_BYTES );
605- this .encoding = processHttpStream (bom , pis , lenient , httpContentType );
605+ this .encoding = toEncoding (bom , pis , lenient , httpContentType );
606606 this .reader = new InputStreamReader (pis , encoding );
607607 }
608608
@@ -686,13 +686,134 @@ public XmlStreamReader(final URLConnection urlConnection, final String defaultEn
686686 .get ();
687687 // @formatter:on
688688 if (urlConnection instanceof HttpURLConnection || contentType != null ) {
689- this .encoding = processHttpStream (bomInput , piInput , lenient , contentType );
689+ this .encoding = toEncoding (bomInput , piInput , lenient , contentType );
690690 } else {
691- this .encoding = processHttpStream (bomInput , piInput , lenient );
691+ this .encoding = toEncoding (bomInput , piInput , lenient );
692692 }
693693 this .reader = new InputStreamReader (piInput , encoding );
694694 }
695695
696+ /**
697+ * Closes the XmlStreamReader stream.
698+ *
699+ * @throws IOException thrown if there was a problem closing the stream.
700+ */
701+ @ Override
702+ public void close () throws IOException {
703+ reader .close ();
704+ }
705+
706+ /**
707+ * Gets the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate.
708+ * <p>
709+ * If it is {@code null} the content-type based rules are used.
710+ * </p>
711+ *
712+ * @return the default encoding to use.
713+ */
714+ public String getDefaultEncoding () {
715+ return defaultEncoding ;
716+ }
717+
718+ /**
719+ * Gets the charset encoding of the XmlStreamReader.
720+ *
721+ * @return charset encoding.
722+ */
723+ public String getEncoding () {
724+ return encoding ;
725+ }
726+
727+ /**
728+ * Reads the underlying reader's {@code read(char[], int, int)} method.
729+ *
730+ * @param buf the buffer to read the characters into.
731+ * @param offset The start offset.
732+ * @param len The number of bytes to read.
733+ * @return the number of characters read or -1 if the end of stream.
734+ * @throws IOException if an I/O error occurs.
735+ */
736+ @ Override
737+ public int read (final char [] buf , final int offset , final int len ) throws IOException {
738+ return reader .read (buf , offset , len );
739+ }
740+
741+ /**
742+ * Process the raw stream.
743+ *
744+ * @param bomInput BOMInputStream to detect byte order marks.
745+ * @param piInput BOMInputStream to guess XML encoding.
746+ * @param lenient indicates if the charset encoding detection should be relaxed.
747+ * @return the encoding to be used.
748+ * @throws IOException thrown if there is a problem reading the stream.
749+ */
750+ private String toEncoding (final BOMInputStream bomInput , final BOMInputStream piInput , final boolean lenient ) throws IOException {
751+ final String bomEnc = bomInput .getBOMCharsetName ();
752+ final String xmlGuessEnc = piInput .getBOMCharsetName ();
753+ final String xmlEnc = getXmlProlog (piInput , xmlGuessEnc );
754+ try {
755+ return toRawEncoding (bomEnc , xmlGuessEnc , xmlEnc );
756+ } catch (final XmlStreamReaderException ex ) {
757+ if (lenient ) {
758+ return toEncodingLenient (null , ex );
759+ }
760+ throw ex ;
761+ }
762+ }
763+
764+ /**
765+ * Processes an HTTP stream.
766+ *
767+ * @param bomInput BOMInputStream to detect byte order marks.
768+ * @param piInput BOMInputStream to guess XML encoding.
769+ * @param lenient indicates if the charset encoding detection should be relaxed.
770+ * @param httpContentType The HTTP content type.
771+ * @return the encoding to be used.
772+ * @throws IOException thrown if there is a problem reading the stream.
773+ */
774+ private String toEncoding (final BOMInputStream bomInput , final BOMInputStream piInput , final boolean lenient , final String httpContentType )
775+ throws IOException {
776+ final String bomEnc = bomInput .getBOMCharsetName ();
777+ final String xmlGuessEnc = piInput .getBOMCharsetName ();
778+ final String xmlEnc = getXmlProlog (piInput , xmlGuessEnc );
779+ try {
780+ return toHttpEncoding (bomEnc , xmlGuessEnc , xmlEnc , lenient , httpContentType );
781+ } catch (final XmlStreamReaderException ex ) {
782+ if (lenient ) {
783+ return toEncodingLenient (httpContentType , ex );
784+ }
785+ throw ex ;
786+ }
787+ }
788+
789+ /**
790+ * Detects the encoding in lenient mode.
791+ *
792+ * @param httpContentType content-type header to use for the resolution of the charset encoding.
793+ * @param ex The thrown exception.
794+ * @return the encoding.
795+ * @throws IOException thrown if there is a problem reading the stream.
796+ */
797+ private String toEncodingLenient (String httpContentType , XmlStreamReaderException ex ) throws IOException {
798+ if (httpContentType != null && httpContentType .startsWith ("text/html" )) {
799+ httpContentType = httpContentType .substring ("text/html" .length ());
800+ httpContentType = "text/xml" + httpContentType ;
801+ try {
802+ return toHttpEncoding (ex .getBomEncoding (), ex .getXmlGuessEncoding (), ex .getXmlEncoding (), true , httpContentType );
803+ } catch (final XmlStreamReaderException ex2 ) {
804+ ex = ex2 ;
805+ }
806+ }
807+ String encoding = ex .getXmlEncoding ();
808+ if (encoding == null ) {
809+ encoding = ex .getContentTypeEncoding ();
810+ }
811+ if (encoding == null ) {
812+ encoding = defaultEncoding == null ? UTF_8 : defaultEncoding ;
813+ }
814+ return encoding ;
815+ }
816+
696817 /**
697818 * Calculates the HTTP encoding.
698819 *
@@ -704,34 +825,29 @@ public XmlStreamReader(final URLConnection urlConnection, final String defaultEn
704825 * @return the HTTP encoding.
705826 * @throws IOException thrown if there is a problem reading the stream.
706827 */
707- String calculateHttpEncoding (final String bomEnc , final String xmlGuessEnc , final String xmlEnc , final boolean lenient , final String httpContentType )
828+ String toHttpEncoding (final String bomEnc , final String xmlGuessEnc , final String xmlEnc , final boolean lenient , final String httpContentType )
708829 throws IOException {
709-
710830 // Lenient and has XML encoding
711831 if (lenient && xmlEnc != null ) {
712832 return xmlEnc ;
713833 }
714-
715834 // Determine mime/encoding content types from HTTP Content Type
716835 final String cTMime = getContentTypeMime (httpContentType );
717836 final String cTEnc = getContentTypeEncoding (httpContentType );
718837 final boolean appXml = isAppXml (cTMime );
719838 final boolean textXml = isTextXml (cTMime );
720-
721839 // Mime type NOT "application/xml" or "text/xml"
722840 if (!appXml && !textXml ) {
723841 final String msg = MessageFormat .format (HTTP_EX_3 , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
724842 throw new XmlStreamReaderException (msg , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
725843 }
726-
727844 // No content type encoding
728845 if (cTEnc == null ) {
729846 if (appXml ) {
730- return calculateRawEncoding (bomEnc , xmlGuessEnc , xmlEnc );
847+ return toRawEncoding (bomEnc , xmlGuessEnc , xmlEnc );
731848 }
732849 return defaultEncoding == null ? US_ASCII : defaultEncoding ;
733850 }
734-
735851 // UTF-16BE or UTF-16LE content type encoding
736852 if (cTEnc .equals (UTF_16BE ) || cTEnc .equals (UTF_16LE )) {
737853 if (bomEnc != null ) {
@@ -740,7 +856,6 @@ String calculateHttpEncoding(final String bomEnc, final String xmlGuessEnc, fina
740856 }
741857 return cTEnc ;
742858 }
743-
744859 // UTF-16 content type encoding
745860 if (cTEnc .equals (UTF_16 )) {
746861 if (bomEnc != null && bomEnc .startsWith (UTF_16 )) {
@@ -749,7 +864,6 @@ String calculateHttpEncoding(final String bomEnc, final String xmlGuessEnc, fina
749864 final String msg = MessageFormat .format (HTTP_EX_2 , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
750865 throw new XmlStreamReaderException (msg , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
751866 }
752-
753867 // UTF-32BE or UTF-132E content type encoding
754868 if (cTEnc .equals (UTF_32BE ) || cTEnc .equals (UTF_32LE )) {
755869 if (bomEnc != null ) {
@@ -758,7 +872,6 @@ String calculateHttpEncoding(final String bomEnc, final String xmlGuessEnc, fina
758872 }
759873 return cTEnc ;
760874 }
761-
762875 // UTF-32 content type encoding
763876 if (cTEnc .equals (UTF_32 )) {
764877 if (bomEnc != null && bomEnc .startsWith (UTF_32 )) {
@@ -767,7 +880,6 @@ String calculateHttpEncoding(final String bomEnc, final String xmlGuessEnc, fina
767880 final String msg = MessageFormat .format (HTTP_EX_2 , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
768881 throw new XmlStreamReaderException (msg , cTMime , cTEnc , bomEnc , xmlGuessEnc , xmlEnc );
769882 }
770-
771883 return cTEnc ;
772884 }
773885
@@ -780,7 +892,7 @@ String calculateHttpEncoding(final String bomEnc, final String xmlGuessEnc, fina
780892 * @return the raw encoding.
781893 * @throws IOException thrown if there is a problem reading the stream.
782894 */
783- String calculateRawEncoding (final String bomEnc , final String xmlGuessEnc , final String xmlEnc ) throws IOException {
895+ String toRawEncoding (final String bomEnc , final String xmlGuessEnc , final String xmlEnc ) throws IOException {
784896
785897 // BOM is Null
786898 if (bomEnc == null ) {
@@ -833,125 +945,4 @@ String calculateRawEncoding(final String bomEnc, final String xmlGuessEnc, final
833945 throw new XmlStreamReaderException (msg , bomEnc , xmlGuessEnc , xmlEnc );
834946 }
835947
836- /**
837- * Closes the XmlStreamReader stream.
838- *
839- * @throws IOException thrown if there was a problem closing the stream.
840- */
841- @ Override
842- public void close () throws IOException {
843- reader .close ();
844- }
845-
846- /**
847- * Does lenient detection.
848- *
849- * @param httpContentType content-type header to use for the resolution of the charset encoding.
850- * @param ex The thrown exception.
851- * @return the encoding.
852- * @throws IOException thrown if there is a problem reading the stream.
853- */
854- private String doLenientDetection (String httpContentType , XmlStreamReaderException ex ) throws IOException {
855- if (httpContentType != null && httpContentType .startsWith ("text/html" )) {
856- httpContentType = httpContentType .substring ("text/html" .length ());
857- httpContentType = "text/xml" + httpContentType ;
858- try {
859- return calculateHttpEncoding (ex .getBomEncoding (), ex .getXmlGuessEncoding (), ex .getXmlEncoding (), true , httpContentType );
860- } catch (final XmlStreamReaderException ex2 ) {
861- ex = ex2 ;
862- }
863- }
864- String encoding = ex .getXmlEncoding ();
865- if (encoding == null ) {
866- encoding = ex .getContentTypeEncoding ();
867- }
868- if (encoding == null ) {
869- encoding = defaultEncoding == null ? UTF_8 : defaultEncoding ;
870- }
871- return encoding ;
872- }
873-
874- /**
875- * Gets the default encoding to use if none is set in HTTP content-type, XML prolog and the rules based on content-type are not adequate.
876- * <p>
877- * If it is {@code null} the content-type based rules are used.
878- * </p>
879- *
880- * @return the default encoding to use.
881- */
882- public String getDefaultEncoding () {
883- return defaultEncoding ;
884- }
885-
886- /**
887- * Gets the charset encoding of the XmlStreamReader.
888- *
889- * @return charset encoding.
890- */
891- public String getEncoding () {
892- return encoding ;
893- }
894-
895- /**
896- * Process the raw stream.
897- *
898- * @param bomInput BOMInputStream to detect byte order marks.
899- * @param piInput BOMInputStream to guess XML encoding.
900- * @param lenient indicates if the charset encoding detection should be relaxed.
901- * @return the encoding to be used.
902- * @throws IOException thrown if there is a problem reading the stream.
903- */
904- private String processHttpStream (final BOMInputStream bomInput , final BOMInputStream piInput , final boolean lenient ) throws IOException {
905- final String bomEnc = bomInput .getBOMCharsetName ();
906- final String xmlGuessEnc = piInput .getBOMCharsetName ();
907- final String xmlEnc = getXmlProlog (piInput , xmlGuessEnc );
908- try {
909- return calculateRawEncoding (bomEnc , xmlGuessEnc , xmlEnc );
910- } catch (final XmlStreamReaderException ex ) {
911- if (lenient ) {
912- return doLenientDetection (null , ex );
913- }
914- throw ex ;
915- }
916- }
917-
918- /**
919- * Processes an HTTP stream.
920- *
921- * @param bomInput BOMInputStream to detect byte order marks.
922- * @param piInput BOMInputStream to guess XML encoding.
923- * @param lenient indicates if the charset encoding detection should be relaxed.
924- * @param httpContentType The HTTP content type.
925- * @return the encoding to be used.
926- * @throws IOException thrown if there is a problem reading the stream.
927- */
928- private String processHttpStream (final BOMInputStream bomInput , final BOMInputStream piInput , final boolean lenient , final String httpContentType )
929- throws IOException {
930- final String bomEnc = bomInput .getBOMCharsetName ();
931- final String xmlGuessEnc = piInput .getBOMCharsetName ();
932- final String xmlEnc = getXmlProlog (piInput , xmlGuessEnc );
933- try {
934- return calculateHttpEncoding (bomEnc , xmlGuessEnc , xmlEnc , lenient , httpContentType );
935- } catch (final XmlStreamReaderException ex ) {
936- if (lenient ) {
937- return doLenientDetection (httpContentType , ex );
938- }
939- throw ex ;
940- }
941- }
942-
943- /**
944- * Reads the underlying reader's {@code read(char[], int, int)} method.
945- *
946- * @param buf the buffer to read the characters into.
947- * @param offset The start offset.
948- * @param len The number of bytes to read.
949- * @return the number of characters read or -1 if the end of stream.
950- * @throws IOException if an I/O error occurs.
951- */
952- @ Override
953- public int read (final char [] buf , final int offset , final int len ) throws IOException {
954- return reader .read (buf , offset , len );
955- }
956-
957948}
0 commit comments