Я пытался понять Хаффман кода, написанный на Python с помощью Rosetta codePython, понимая Хаффман код 2
Я понимаю, большинство из них и сделал комментарии по некоторым частям:
from heapq import heappush, heappop, heapify
from collections import defaultdict
import collections
txt = "abbaac!ccc" #txt is variable for string which will be encoded
symb2freq = collections.Counter(txt) #counts letter frequency
#and puts in dictionary
def encode(symb2freq): #define a function
heap = [[freq, [symbol, ""]] for symbol, freq in symb2freq.items()] #converts dictionary to a list in order to sort out alphabets in terms of frequencies
heapify(heap) #This sorts out the list so that 1st element is always the
#smallest
while len(heap) > 1:#while there is more than 1 element in the list
left = heappop(heap) #Takes the lowest frequency from the list
print("lo=", left)
right = heappop(heap) #Takes the lowest frequency from the list
for x in left[1:]:
x[1] = '0' + x[1]
for x in right[1:]:
x[1] = '1' + x[1]
add_freq= [left[0] + right[0]] + left[1:] + right[1:] #adds the frequencies and inserts frequency, symbol and alphabet (0 or 1) into one list e.g. [3, ['!', '0'], ['b', '1']]
heappush(heap, add_freq)#inserts add_freq into heap
return sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p)) #What is this doing?
huff = encode(symb2freq)
print ("Symbol\tWeight\tHuffman Code")
for p in huff:
print ("%s\t%s\t%s" % (p[0], symb2freq[p[0]], p[1]))
Существует одна линии что я не понимаю:
return sorted(heappop(heap)[1:], key=lambda p: (len(p[-1]), p)) #What is this doing?
Я изменил его немного, чтобы лучше понять.