2025-01-22 霍夫曼编码
This commit is contained in:
parent
be0d29a010
commit
c8efffad57
226
src/dataANDcalc/java/tree/HuffmanCode.java
Normal file
226
src/dataANDcalc/java/tree/HuffmanCode.java
Normal file
@ -0,0 +1,226 @@
|
||||
package tree;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class HuffmanCode {
|
||||
|
||||
private HuffmanNode huffTree;
|
||||
|
||||
private ArrayList<HuffmanNode> nodeArrs;
|
||||
|
||||
private Map<String, String> codeMap;
|
||||
|
||||
private String sentence;
|
||||
|
||||
public HuffmanCode(String sentence){
|
||||
this.nodeArrs = new ArrayList<>();
|
||||
this.codeMap = new HashMap<>();
|
||||
this.sentence = sentence;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param huffmanNode
|
||||
* @param code
|
||||
* @param sb 这里要用string(因为string不可修改),不能用stringbuffer,stringbuffer会一直传递下去
|
||||
*/
|
||||
private void createHuffCode(HuffmanNode huffmanNode, String code, String sb){
|
||||
if(huffmanNode == null){
|
||||
System.out.println("当前节点为空");
|
||||
return;
|
||||
}
|
||||
System.out.println(huffmanNode.toString());
|
||||
sb += code;
|
||||
|
||||
if (huffmanNode.left!= null){
|
||||
createHuffCode(huffmanNode.left, "0", sb);
|
||||
} else {
|
||||
codeMap.put(huffmanNode.str, sb);
|
||||
return;
|
||||
}
|
||||
|
||||
if (huffmanNode.right!= null){
|
||||
createHuffCode(huffmanNode.right, "1", sb);
|
||||
} else {
|
||||
codeMap.put(huffmanNode.str, sb);
|
||||
}
|
||||
}
|
||||
|
||||
private String transToHuffCode(){
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(int i = 0; i < sentence.length(); i++){
|
||||
sb.append(codeMap.get(String.valueOf(sentence.charAt(i))));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return 返回霍夫曼编码处理后的byte[]
|
||||
* 霍夫曼字符串: 1010100010111111110010001011111111001000101111111100100101001101110001110000011011101000111100101000101111111100110001001010011011100
|
||||
* 对应的byte[] huffCodeBytes, 8位对应一个byte
|
||||
* huffCodeBytes[0] = 10101000(补码) -》 10101000 - 1 =》 10100111(反码) =》 11011000 =》 -88
|
||||
* (补码是对二进制有符号数的一种表示方法,计算机通常使用补码来存储整数。它的主要特性是:
|
||||
* 正数的补码与原码相同。
|
||||
* 负数的补码通过对原码取反加1来得到。)
|
||||
* 在补码表示中,正数和负数可以直接相加,而不需要额外处理符号位
|
||||
*/
|
||||
private byte[] zip(){
|
||||
String huffCode = transToHuffCode();
|
||||
int length = huffCode.length() % 8 == 0 ? huffCode.length()/8 : huffCode.length()/8 + 1;
|
||||
byte[] huffCodeBytes = new byte[length];
|
||||
int index = 0;
|
||||
int i = 0;
|
||||
while(i<huffCode.length()){
|
||||
String strByte = "";
|
||||
if(i+8 > huffCode.length()){
|
||||
strByte = huffCode.substring(i);
|
||||
} else {
|
||||
strByte = huffCode.substring(i, i + 8);
|
||||
}
|
||||
huffCodeBytes[index] = (byte)Integer.parseInt(strByte, 2);
|
||||
i += 8;
|
||||
index ++;
|
||||
}
|
||||
return huffCodeBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param flag true-需要补高位 false-不需要补高位
|
||||
* @param b
|
||||
* @return
|
||||
*/
|
||||
private String byteToStr(boolean flag, byte b){
|
||||
int temp = b;
|
||||
if(flag){
|
||||
//可能是正数,需要补零 256 | 1 -> 1 0000 0000 | 0000 00001 -> 1 0000 0001
|
||||
temp = temp | 256;
|
||||
}
|
||||
// 因为int最大数是2^31-1,所以最高位数有32位,所以生成的binaryString是32位
|
||||
String binaryString = Integer.toBinaryString(temp);
|
||||
if(flag){
|
||||
// 截取最后8位
|
||||
return binaryString.substring(binaryString.length() - 8);
|
||||
}else {
|
||||
return binaryString;
|
||||
}
|
||||
}
|
||||
|
||||
private String unzip(byte[] huffCodeBytes){
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for(int i = 0; i < huffCodeBytes.length; i++){
|
||||
String str = byteToStr((i < huffCodeBytes.length - 1), huffCodeBytes[i]);
|
||||
sb.append(str);
|
||||
}
|
||||
String huffCode = sb.toString();
|
||||
|
||||
// 将霍夫曼编码表进行反转
|
||||
Map<String, String> reverseMap = new HashMap<>();
|
||||
for(Map.Entry<String, String> entry : codeMap.entrySet()){
|
||||
reverseMap.put(entry.getValue(), entry.getKey());
|
||||
}
|
||||
String code = "";
|
||||
StringBuffer resultSb = new StringBuffer();
|
||||
for(int i = 0; i < huffCode.length(); i++){
|
||||
code += huffCode.substring(i, i+1);
|
||||
if(reverseMap.containsKey(code)){
|
||||
resultSb.append(reverseMap.get(code));
|
||||
code = "";
|
||||
}
|
||||
}
|
||||
return resultSb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据句子解析的map创建霍夫曼树
|
||||
*/
|
||||
private void createHuffTree(){
|
||||
HashMap<String, Integer> analysis = analysis();
|
||||
for(Map.Entry<String, Integer> entry : analysis.entrySet()){
|
||||
nodeArrs.add(new HuffmanNode(entry.getValue(), entry.getKey()));
|
||||
}
|
||||
while(nodeArrs.size() > 1){
|
||||
Collections.sort(nodeArrs);
|
||||
HuffmanNode left = nodeArrs.get(0);
|
||||
HuffmanNode right = nodeArrs.get(1);
|
||||
HuffmanNode root = new HuffmanNode(left.counts + right.counts, null);
|
||||
root.left = left;
|
||||
root.right = right;
|
||||
nodeArrs.remove(left);
|
||||
nodeArrs.remove(right);
|
||||
nodeArrs.add(root);
|
||||
}
|
||||
huffTree = nodeArrs.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析传入的句子,将每个字符及其出现的次数存入map中
|
||||
* @return
|
||||
*/
|
||||
private HashMap<String, Integer> analysis(){
|
||||
HashMap<String, Integer> map = new HashMap<>();
|
||||
for(int i = 0; i < sentence.length(); i++){
|
||||
String key = String.valueOf(sentence.charAt(i));
|
||||
if(map.containsKey(key)){
|
||||
map.put(key, map.get(key) + 1);
|
||||
}else{
|
||||
map.put(key, 1);
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private void forwardIterator(HuffmanNode huffmanNode){
|
||||
if(huffmanNode == null){
|
||||
System.out.println("当前节点为空");
|
||||
return;
|
||||
}
|
||||
System.out.println(huffmanNode.toString());
|
||||
|
||||
if (huffmanNode.left!= null){
|
||||
forwardIterator(huffmanNode.left);
|
||||
}
|
||||
|
||||
if (huffmanNode.right!= null){
|
||||
forwardIterator(huffmanNode.right);
|
||||
}
|
||||
}
|
||||
|
||||
public static class HuffmanNode implements Comparable<HuffmanNode>{
|
||||
private int counts;
|
||||
|
||||
private String str;
|
||||
|
||||
private HuffmanNode left;
|
||||
|
||||
private HuffmanNode right;
|
||||
|
||||
public HuffmanNode(int counts, String str) {
|
||||
this.counts = counts;
|
||||
this.str = str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(HuffmanNode o) {
|
||||
return this.counts - o.counts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HuffmanNode{" +
|
||||
"counts=" + counts +
|
||||
", str='" + str + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
HuffmanCode huffmanCode = new HuffmanCode("i like like like java do you like a java");
|
||||
huffmanCode.createHuffTree();
|
||||
huffmanCode.createHuffCode(huffmanCode.huffTree, "", "");
|
||||
System.out.println(huffmanCode.codeMap);
|
||||
System.out.println(huffmanCode.unzip(huffmanCode.zip()));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user