diff --git a/util/src/main/java/de/muehlencord/shared/util/file/BOM.java b/util/src/main/java/de/muehlencord/shared/util/file/BOM.java new file mode 100644 index 0000000..00dcebc --- /dev/null +++ b/util/src/main/java/de/muehlencord/shared/util/file/BOM.java @@ -0,0 +1,237 @@ +package de.muehlencord.shared.util.file; + +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * Defines possbile file Byte Order Marks used in files. + * + * @see http://en.wikipedia.org/wiki/Byte-order_mark + * @author joern.muehlencord + */ +public class BOM { + + /** + * UTF32(Big Endian) + */ + public static final int[] BOM_UTF32_BE = {0x00, 0x00, 0xFE, 0xFF}; // + /** + * UTF32(Little Endian) + */ + public static final int[] BOM_UTF32_LE = {0xFF, 0xFE, 0x00, 0x00}; // + /** + * UTF-1 + */ + public static final int[] BOM_UTF1 = {0xF7, 0x64, 0x4C}; // UTF-1 + /** + * UTF7 - version 1 + */ + public static final int[] BOM_UTF7_1 = {0x2B, 0x2F, 0x76, 0x38}; // UTF-7 + /** + * UTF7 - version 2 + */ + public static final int[] BOM_UTF7_2 = {0x2B, 0x2F, 0x76, 0x39}; // UTF-7 + /** + * UTF7 - version 3 + */ + public static final int[] BOM_UTF7_3 = {0x2B, 0x2F, 0x76, 0x2B}; // UTF-7 + /** + * UTF7 - version 4 + */ + public static final int[] BOM_UTF7_4 = {0x2B, 0x2F, 0x76, 0x2F}; // UTF-7 + /** + * UTF-EBCDIC + */ + public static final int[] BOM_UTF_EBCDIC = {0xDD, 0x73, 0x66, 0x73}; // UTF - EBCDIC + /** + * UTF-8 + */ + public static final int[] BOM_UTF8 = {0xEF, 0xBB, 0xBF}; // UTF-8 + /** + * SCSU + */ + public static final int[] BOM_SCSU = {0x0E, 0xFE, 0xFF}; // SCSU + /** + * BOCU-1 - version 1 + */ + public static final int[] BOM_BOCU1_1 = {0xFB, 0xEE, 0x28}; // BOCU-1 + /** + * BOCU-2 - version 2 + */ + public static final int[] BOM_BOCU1_2 = {0xFB, 0xEE, 0x28, 0xFF}; // BOCU-1 + /** + * UTF-16 - big endian + */ + public static final int[] BOM_UTF16_BE = {0xFE, 0xFF}; // UTF-16 (Big Endian) + /** + * UTF-16 - little endian + */ + public static final int[] BOM_UTF16_LE = {0xFF, 0xFE}; // UTF-16 (Little Endian) + /** + * GB-18030 + */ + public static final int[] BOM_GB_18030 = {0x84, 0x31, 0x95, 0x33}; // GB-18030 + /** + * mapping from bom bytes to encoding name + */ + private static Map bomMap = null; + /** + * key1 of the BOM + */ + private int key1 = 0; + /** + * key2 of the BOM + */ + private int key2 = 0; + /** + * key3 of the BOM + */ + private int key3 = 0; + /** + * key4 of the BOM + */ + private int key4 = 0; + + /** + * setups the bom bytes to encoding name map + */ + private static void initBomMap() { + bomMap = new HashMap<>(); + bomMap.put(new BOM(BOM_UTF1), "UTF-1"); + bomMap.put(new BOM(BOM_UTF7_1), "UTF-7"); + bomMap.put(new BOM(BOM_UTF7_2), "UTF-7"); + bomMap.put(new BOM(BOM_UTF7_3), "UTF-7"); + bomMap.put(new BOM(BOM_UTF7_4), "UTF-7"); + bomMap.put(new BOM(BOM_UTF8), "UTF-8"); + bomMap.put(new BOM(BOM_UTF16_BE), "UTF-16_BE"); + bomMap.put(new BOM(BOM_UTF16_LE), "UTF-16_LE"); + bomMap.put(new BOM(BOM_UTF32_BE), "UTF-32_BE"); + bomMap.put(new BOM(BOM_UTF32_LE), "UTF-32_LE"); + bomMap.put(new BOM(BOM_UTF_EBCDIC), "UTF_EBCDIC"); + bomMap.put(new BOM(BOM_SCSU), "UTF-8"); + bomMap.put(new BOM(BOM_BOCU1_1), "BOCU-1"); + bomMap.put(new BOM(BOM_BOCU1_2), "BOCU-1"); + bomMap.put(new BOM(BOM_GB_18030), "GB-18030"); + } + + /** + * creates a new instance of a BOM + * + * @param k1 the key1 + * @param k2 the key2 + * @param k3 the key3 + * @param k4 the key4 + */ + public BOM(int k1, int k2, int k3, int k4) { + this.key1 = k1; + this.key2 = k2; + this.key3 = k3; + this.key4 = k4; + } + + /** + * creates a new instance of a BOM + * + * @param k1 the key1 + * @param k2 the key2 + * @param k3 the key3 + */ + public BOM(int k1, int k2, int k3) { + this(k1, k2, k3, 0); + } + + /** + * creates a new instance of a BOM + * + * @param k1 the key1 + * @param k2 the key2 + */ + public BOM(int k1, int k2) { + this(k1, k2, 0, 0); + } + + /** + * creates a new instance of a BOM by the given key array + * + * @param keys the keys to use (minimum 2 key, maximum 4 keys are evaluated) + */ + public BOM(final int[] keys) { + int[] internalKeys; + if (keys != null) { + internalKeys = Arrays.copyOf(keys, keys.length); + + if (keys.length > 0) { + this.key1 = internalKeys[0]; + } + if (keys.length > 1) { + this.key2 = internalKeys[1]; + } + if (keys.length > 2) { + this.key3 = internalKeys[2]; + } else { + this.key3 = 0; + } + if (keys.length > 3) { + this.key4 = internalKeys[3]; + } else { + this.key4 = 0; + } + } + } + + /** + * returns true, if the given object is a BOM and represents the same encoding as this object + * + * @param o the object to compare to + */ + @Override + public boolean equals(Object o) { + if (o instanceof BOM) { + BOM b = (BOM) o; + boolean equalsKey1 = this.key1 == b.key1; + boolean equalsKey2 = this.key2 == b.key2; + boolean equalsKey3 = this.key3 == b.key3; + boolean equalsKey4 = this.key4 == b.key4; + return (equalsKey1 && equalsKey2 && equalsKey3 && equalsKey4); + } else { + return false; + } + } + + /** + * returns the hashcode for this object + * + * @return the hashCode for this object + */ + @Override + public int hashCode() { + int hash = 3; + hash = 53 * hash + this.key1; + hash = 53 * hash + this.key2; + hash = 53 * hash + this.key3; + hash = 53 * hash + this.key4; + return hash; + } + + /** + * returns the name of the encoding for the given bom bytes + * + * @param bomBytes the bytes representing the bom + * @return the name of the encoding for the given bom bytes + * @throws UnsupportedEncodingException if no valid encoding name can be found + */ + public static String getEncoding(final int[] bomBytes) throws UnsupportedEncodingException { + if (bomMap == null) { + initBomMap(); + } + + BOM currentBOM = new BOM(bomBytes); + if (bomMap.containsKey(currentBOM)) { + return bomMap.get(currentBOM); + } else { + throw new UnsupportedEncodingException("Not supported encoding found"); + } + } +} \ No newline at end of file diff --git a/util/src/main/java/de/muehlencord/shared/util/file/BOMStripperInputStream.java b/util/src/main/java/de/muehlencord/shared/util/file/BOMStripperInputStream.java new file mode 100644 index 0000000..2017d93 --- /dev/null +++ b/util/src/main/java/de/muehlencord/shared/util/file/BOMStripperInputStream.java @@ -0,0 +1,76 @@ +package de.muehlencord.shared.util.file; + +import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; + +/** + * Stream which removes the leading BOM from an input stream + * + * @author joern.muehlencord + */ +public class BOMStripperInputStream extends PushbackInputStream { + + /** + * List of all possible BOMS + * + * @see http://en.wikipedia.org/wiki/Byte-order_mark for details + */ + public static final int[][] BOMS = { + BOM.BOM_UTF32_BE, + BOM.BOM_UTF32_LE, + BOM.BOM_UTF1, + BOM.BOM_UTF7_1, + BOM.BOM_UTF7_2, + BOM.BOM_UTF7_3, + BOM.BOM_UTF7_4, + BOM.BOM_UTF_EBCDIC, + BOM.BOM_UTF8, + BOM.BOM_SCSU, + BOM.BOM_BOCU1_1, + BOM.BOM_BOCU1_2, + BOM.BOM_UTF16_BE, + BOM.BOM_UTF16_LE, + BOM.BOM_GB_18030 + }; + + /** + * Returns the length of the bom, if bytes representat a bom - 0 else + * + * @param bom the list of boms to check agains + * @param bytes the bytes to check whether they are a BOM or not + * @return the length of the bom, if bytes representat a bom - 0 else + */ + private static int testForBOM(int[] bom, int[] bytes) { + for (int index = 0; index < bom.length; index++) { + if (bom[index] != bytes[index]) { + return 0; + } + } + return bom.length; + } + + /** + * Creates a new BOMStripperInputStream which removes the leading BOM from an input stream + * + * @param is the input stream to read the data from + * @throws IOException if the BOM cannot be loaded + */ + public BOMStripperInputStream(InputStream is) throws IOException { + super(is, 4); + + final int[] bytes = {read(), read(), read(), read()}; + int count = 0; + for (int[] bom : BOMS) { + count = testForBOM(bom, bytes); + if (count != 0) { + break; + } + } + for (int index = bytes.length - 1; index >= count; index--) { + if (bytes[index] != -1) { + unread(bytes[index]); + } + } + } +} diff --git a/util/src/main/java/de/muehlencord/shared/util/file/FileUtil.java b/util/src/main/java/de/muehlencord/shared/util/file/FileUtil.java index 184b11a..db02a83 100644 --- a/util/src/main/java/de/muehlencord/shared/util/file/FileUtil.java +++ b/util/src/main/java/de/muehlencord/shared/util/file/FileUtil.java @@ -1,12 +1,15 @@ package de.muehlencord.shared.util.file; -import static de.muehlencord.shared.util.StringUtil.getStackTraceString; +import java.io.BufferedReader; import java.io.File; import java.io.FileFilter; import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.nio.file.Files; import java.nio.file.Paths; import java.security.DigestInputStream; @@ -85,7 +88,7 @@ public abstract class FileUtil { * @param source the file to copy from * @param destination the destination filename to copy the source file to * @return true, if the vile was copied, false, else - * + * * @deprecated use Files.copy instead */ public static boolean copyFileTo(File source, File destination) { @@ -145,7 +148,7 @@ public abstract class FileUtil { try { md = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException ex) { - throw new IOException("Cannot get MD5 MessageDigest instance. Reason: "+ex.toString(), ex); + throw new IOException("Cannot get MD5 MessageDigest instance. Reason: " + ex.toString(), ex); } try (InputStream is = Files.newInputStream(Paths.get(fileName)); @@ -165,4 +168,38 @@ public abstract class FileUtil { } return sb.toString(); } + + /** + * Reads the content of the given file and returns it as String + * + * @param inputFile the input file to read + * @param encoding the encoding of the file to load + * @return the content of the file as string + * @throws IOException if the file cannot be read + */ + public static String readFile(String inputFile, String encoding) throws IOException { + String message = ""; + + try (FileInputStream fis = new FileInputStream(inputFile); + BOMStripperInputStream bis = new BOMStripperInputStream(fis); + Reader reader = new InputStreamReader(bis, encoding); + BufferedReader in = new BufferedReader(reader, 4096)) { + + boolean isFirstLineRead = true; + StringBuilder s = new StringBuilder(); + while (in.ready()) { + if (!isFirstLineRead) { + s.append("\n"); + } else { + isFirstLineRead = false; + } + s.append(in.readLine()); + } + message = s.toString(); + } catch (FileNotFoundException fnex) { + throw new FileNotFoundException("Error occured while reading file. Reason:" + fnex.getMessage()); + } + + return message; + } }