From c8efffad57c80893bbcc130d0f46d4e499543218 Mon Sep 17 00:00:00 2001 From: liangjinglin Date: Thu, 23 Jan 2025 13:32:09 +0800 Subject: [PATCH] =?UTF-8?q?2025-01-22=20=E9=9C=8D=E5=A4=AB=E6=9B=BC?= =?UTF-8?q?=E7=BC=96=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataANDcalc/java/tree/HuffmanCode.java | 226 +++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 src/dataANDcalc/java/tree/HuffmanCode.java diff --git a/src/dataANDcalc/java/tree/HuffmanCode.java b/src/dataANDcalc/java/tree/HuffmanCode.java new file mode 100644 index 0000000..479829e --- /dev/null +++ b/src/dataANDcalc/java/tree/HuffmanCode.java @@ -0,0 +1,226 @@ +package tree; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class HuffmanCode { + + private HuffmanNode huffTree; + + private ArrayList nodeArrs; + + private Map codeMap; + + private String sentence; + + public HuffmanCode(String sentence){ + this.nodeArrs = new ArrayList<>(); + this.codeMap = new HashMap<>(); + this.sentence = sentence; + } + + /** + * + * @param huffmanNode + * @param code + * @param sb 这里要用string(因为string不可修改),不能用stringbuffer,stringbuffer会一直传递下去 + */ + private void createHuffCode(HuffmanNode huffmanNode, String code, String sb){ + if(huffmanNode == null){ + System.out.println("当前节点为空"); + return; + } + System.out.println(huffmanNode.toString()); + sb += code; + + if (huffmanNode.left!= null){ + createHuffCode(huffmanNode.left, "0", sb); + } else { + codeMap.put(huffmanNode.str, sb); + return; + } + + if (huffmanNode.right!= null){ + createHuffCode(huffmanNode.right, "1", sb); + } else { + codeMap.put(huffmanNode.str, sb); + } + } + + private String transToHuffCode(){ + StringBuilder sb = new StringBuilder(); + for(int i = 0; i < sentence.length(); i++){ + sb.append(codeMap.get(String.valueOf(sentence.charAt(i)))); + } + return sb.toString(); + } + + /** + * @return 返回霍夫曼编码处理后的byte[] + * 霍夫曼字符串: 1010100010111111110010001011111111001000101111111100100101001101110001110000011011101000111100101000101111111100110001001010011011100 + * 对应的byte[] huffCodeBytes, 8位对应一个byte + * huffCodeBytes[0] = 10101000(补码) -》 10101000 - 1 =》 10100111(反码) =》 11011000 =》 -88 + * (补码是对二进制有符号数的一种表示方法,计算机通常使用补码来存储整数。它的主要特性是: + * 正数的补码与原码相同。 + * 负数的补码通过对原码取反加1来得到。) + * 在补码表示中,正数和负数可以直接相加,而不需要额外处理符号位 + */ + private byte[] zip(){ + String huffCode = transToHuffCode(); + int length = huffCode.length() % 8 == 0 ? huffCode.length()/8 : huffCode.length()/8 + 1; + byte[] huffCodeBytes = new byte[length]; + int index = 0; + int i = 0; + while(i huffCode.length()){ + strByte = huffCode.substring(i); + } else { + strByte = huffCode.substring(i, i + 8); + } + huffCodeBytes[index] = (byte)Integer.parseInt(strByte, 2); + i += 8; + index ++; + } + return huffCodeBytes; + } + + /** + * @param flag true-需要补高位 false-不需要补高位 + * @param b + * @return + */ + private String byteToStr(boolean flag, byte b){ + int temp = b; + if(flag){ + //可能是正数,需要补零 256 | 1 -> 1 0000 0000 | 0000 00001 -> 1 0000 0001 + temp = temp | 256; + } + // 因为int最大数是2^31-1,所以最高位数有32位,所以生成的binaryString是32位 + String binaryString = Integer.toBinaryString(temp); + if(flag){ + // 截取最后8位 + return binaryString.substring(binaryString.length() - 8); + }else { + return binaryString; + } + } + + private String unzip(byte[] huffCodeBytes){ + StringBuffer sb = new StringBuffer(); + for(int i = 0; i < huffCodeBytes.length; i++){ + String str = byteToStr((i < huffCodeBytes.length - 1), huffCodeBytes[i]); + sb.append(str); + } + String huffCode = sb.toString(); + + // 将霍夫曼编码表进行反转 + Map reverseMap = new HashMap<>(); + for(Map.Entry entry : codeMap.entrySet()){ + reverseMap.put(entry.getValue(), entry.getKey()); + } + String code = ""; + StringBuffer resultSb = new StringBuffer(); + for(int i = 0; i < huffCode.length(); i++){ + code += huffCode.substring(i, i+1); + if(reverseMap.containsKey(code)){ + resultSb.append(reverseMap.get(code)); + code = ""; + } + } + return resultSb.toString(); + } + + /** + * 根据句子解析的map创建霍夫曼树 + */ + private void createHuffTree(){ + HashMap analysis = analysis(); + for(Map.Entry entry : analysis.entrySet()){ + nodeArrs.add(new HuffmanNode(entry.getValue(), entry.getKey())); + } + while(nodeArrs.size() > 1){ + Collections.sort(nodeArrs); + HuffmanNode left = nodeArrs.get(0); + HuffmanNode right = nodeArrs.get(1); + HuffmanNode root = new HuffmanNode(left.counts + right.counts, null); + root.left = left; + root.right = right; + nodeArrs.remove(left); + nodeArrs.remove(right); + nodeArrs.add(root); + } + huffTree = nodeArrs.get(0); + } + + /** + * 解析传入的句子,将每个字符及其出现的次数存入map中 + * @return + */ + private HashMap analysis(){ + HashMap map = new HashMap<>(); + for(int i = 0; i < sentence.length(); i++){ + String key = String.valueOf(sentence.charAt(i)); + if(map.containsKey(key)){ + map.put(key, map.get(key) + 1); + }else{ + map.put(key, 1); + } + } + return map; + } + + private void forwardIterator(HuffmanNode huffmanNode){ + if(huffmanNode == null){ + System.out.println("当前节点为空"); + return; + } + System.out.println(huffmanNode.toString()); + + if (huffmanNode.left!= null){ + forwardIterator(huffmanNode.left); + } + + if (huffmanNode.right!= null){ + forwardIterator(huffmanNode.right); + } + } + + public static class HuffmanNode implements Comparable{ + private int counts; + + private String str; + + private HuffmanNode left; + + private HuffmanNode right; + + public HuffmanNode(int counts, String str) { + this.counts = counts; + this.str = str; + } + + @Override + public int compareTo(HuffmanNode o) { + return this.counts - o.counts; + } + + @Override + public String toString() { + return "HuffmanNode{" + + "counts=" + counts + + ", str='" + str + '\'' + + '}'; + } + } + + public static void main(String[] args) { + HuffmanCode huffmanCode = new HuffmanCode("i like like like java do you like a java"); + huffmanCode.createHuffTree(); + huffmanCode.createHuffCode(huffmanCode.huffTree, "", ""); + System.out.println(huffmanCode.codeMap); + System.out.println(huffmanCode.unzip(huffmanCode.zip())); + } +}