#! /usr/bin/env python
import random
import sys
################################################################################
def compress(string):
# Get the unique characters and numeric base.
unique = set(string)
base = len(unique)
# Create a key that will encode data properly.
key = random.sample(unique, base)
mapping = dict(map(reversed, enumerate(key)))
while not mapping[string[-1]]:
key = random.sample(unique, base)
mapping = dict(map(reversed, enumerate(key)))
# Create a compressed numeric representation.
value = 0
for place, char in enumerate(string):
value += mapping[char] * base ** place
# Return the number as a string with the table.
return decode(value), bytes(key)
def decode(value):
# Change a number into a string.
array = bytearray()
while value:
value, byte = divmod(value, 256)
array.append(byte)
return bytes(array)
################################################################################
def decompress(string, mapping):
# Get the numeric value of the string.
value = encode(string)
# Find the numeric base and prepare storage.
base = len(mapping)
data = bytearray()
# Decode the value into the original string.
while value:
value, key = divmod(value, base)
data.append(mapping[key])
# Return the "string" as a bytes object.
return bytes(data)
def encode(array):
# Change a string into a number.
assert array and array[-1], 'Array has ambiguous value!'
value = 0
for shift, byte in enumerate(array):
value += byte << 8 * shift
return value
################################################################################
def test():
# Get this program's source.
txt = open(sys.argv[0], 'r').read().encode()
print('Length of data:', len(txt))
# Compress the source numerically.
data, table = compress(txt)
print('Length after compression:', len(data))
print('Length of the table:', len(table))
print('Total compressed size:', len(data + table))
print('Compression ratio: {:%}'.format(len(data + table) / len(txt)))
# Decompress the data using the table.
new = decompress(data, table)
print('Decompression was {}successful.'.format(('not ', '')[txt == new]))
print('Showing the decompressed data:')
print('==============================')
print(new.decode())
# Test this program if run directly.
if __name__ == '__main__':
test()