1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
| #Huffman Encoding #Tree-Node Type
import random class Node: def __init__(self,freq): self.left = None self.right = None self.father = None self.freq = freq def isLeft(self): return self.father.left == self #create nodes创建叶子节点 def createNodes(freqs): return [Node(freq) for freq in freqs]
#create Huffman-Tree创建Huffman树 def createHuffmanTree(nodes): queue = nodes[:] print(queue) #一个个node的地址 #每次对queue进行排序, while len(queue) > 1: queue.sort(key=lambda item:item.freq) #reverse = false node_left = queue.pop(0) node_right = queue.pop(0)
node_father = Node(node_left.freq + node_right.freq)
node_father.left = node_left node_father.right = node_right node_left.father = node_father node_right.father = node_father queue.append(node_father) queue[0].father = None return queue[0] #Huffman编码 def huffmanEncoding(nodes,root): codes = [''] * len(nodes) for i in range(len(nodes)): node_tmp = nodes[i] while node_tmp != root: if node_tmp.isLeft(): codes[i] = '0' + codes[i] else: codes[i] = '1' + codes[i] node_tmp = node_tmp.father return codes def freq_count(strr): chars = [] chars_fre = []
for i in range(len(strr)): if strr[i] in chars: pass else: chars.append(strr[i]) char_fre = (strr[i], strr.count(strr[i])) # print(chars_fre) chars_fre.append(char_fre) return chars_fre
def encoder_huffman(strr,chars_fre,codes): huffmans=''
for word in strr: i = 0 #用于与code【i】还有item 的符号一一对应 for item in chars_fre: if word == item[0]: huffmans += codes[i] i += 1 print(huffmans) return huffmans
def decode_huffman(huffmans,codes,chars_fre): original_code='' while huffmans!='': i=0 for item in codes: if item in huffmans: if huffmans.index(item) ==0: original_code += chars_fre[i][0] huffmans=huffmans[len(item):] i+=1 return original_code
#chars = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N'] #freqs = [10,4,2,5,3,4,2,6,4,4,3,7,9,6]
#元组的列表,item【1】就是后面2,2,3,4,等,生成器然后是作为列表保存,传入createnodes中 # ''''''chars_freqs = [('C', 2), ('G', 2), ('E', 3), ('K', 3), ('B', 4), # ('F', 4), ('I', 4), ('J', 4), ('D', 5), ('H', 6), # ('N', 6), ('L', 7), ('M', 9), ('A', 10)] # '''''' if __name__ =='__main__': #sttttt =input('input your text') #sttttt=input('input your text') sttttt="" sttttt = open('docx复制粘贴出来的文本','r').read() # for i in range(100): # sttttt += str(int(random.randint(27, 70))) # sttttt += random.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()') # sttttt=random.sample( # ['z', 'y', 'x', 'w', 'v', 'u', 't', 's', 'r', 'q', 'p', 'o', 'n', 'm', 'l', 'k', 'j', 'i', 'h', 'g', 'f', 'e', # 'd', 'c', 'b', 'a'],20) # print(sttttt) # print('编译前输入的文本: '+sttttt) #chars_freqs=freq_count(sttttt)
#c=input('input your word and freq, splitted with ,') #splitt = c.split(',') #print(splitt) chars_freqs =[] chars_freqs = freq_count(sttttt) print('文本中字符的统计如下:\n'+str(chars_freqs)) # for i in range(0,len(sttttt),2): # chars_freq= (sttttt[i], sttttt[i+1]) # if sttttt[i] in [item[0] for item in chars_freqs]: # print('can not be a huffman code, there is some word repeating in this sentence, the huffman code will be without:',sttttt[i],sttttt[i+1]) # continue # chars_freqs.append(chars_freq) # print(chars_freqs) nodes = createNodes([item[1] for item in chars_freqs]) #print([item[1] for item in chars_freqs]) root = createHuffmanTree(nodes) codes = huffmanEncoding(nodes,root) res = {} for item in zip(chars_freqs,codes): print ('Character:%s freq:%-2d encoding: %s' % (item[0][0],item[0][1],item[1])) res.update({item[1]:item[0][0]}) # huffman_code=encoder_huffman(sttttt,chars_freqs,codes) # print('转换之后的huffman编码 '+huffman_code) # print('解码之后的原来的文本: '+decode_huffman(huffman_code,codes,chars_freqs)) print(res) d2 = open('你outguess提取出来的编码','r').readlines() re = '' for i in d2: re+=res[i[:-1]] # re += res[i] print(re)
|