#!/usr/bin/env python # -*- coding: utf-8 -*- import sys def rle_compress(data): """ input : data = ascii list of the data to be compressed output : ascii list of compressed data RLE compresses the data """ current_value = data[0] current_count = 0 result = [] for element in data: if element == current_value: current_count += 1 else: result.append(str(current_count)) result.append(current_value) current_value = element current_count = 1 result.append(str(current_count)) result.append(element) return result def rle_decompress(data): """ input : data = ascii list of the data to be decompressed output : ascii list of decompressed data RLE decompresses the data """ result = [] print data for i in range(len(data)/2): count = data[2*i] element = data[2*i+1] for i in range(count): result.append(element) return result def lzss_compress(data): """ input : data = ascii list of the data to be compressed output : ascii list of compressed data LZSS compresses the data search buffer size = 2^10= 1024 look-ahead buffer size = 2^6= 64 10 bits : backwards adress 6 bits : size + 1 byte added every 8 chunks : bitmap to say if it is a normal element of a compressed TODO : ne coder que les redondances de longueur > 3 """ data_cursor = 0 out_cursor = 0 result = [] s_b_size = 1024 # 2**10 l_a_b_size = 64 # 2**6 bitmap_full = False bit_count = 7 bitmap = 0 size_taken = 0 while data_cursor < len(data): if data_cursor == 0: search_buffer = [] else: search_buffer = data[max(data_cursor-s_b_size,0):max(data_cursor,0)] l_a_b_end = min(len(data),data_cursor+l_a_b_size-1) look_ahead_buffer = data[data_cursor:l_a_b_end] match_not_found = True for k in range(len(look_ahead_buffer),2,-1): tmp1 = look_ahead_buffer[0:k] # for i in range(len(search_buffer)-k+1): myrange = range(len(search_buffer)-k+1) myrange.reverse() for i in myrange: tmp2 = search_buffer[i:i+k] if tmp1 == tmp2: # overlap implementation len_overlap_pattern = len(tmp1) detect_overlap_cursor = 0 len_to_add = 0 # detect if the string matched is at the end of the search buffer if i == len(search_buffer)-k: while detect_overlap_cursor+len_overlap_pattern < len(look_ahead_buffer) \ and look_ahead_buffer[detect_overlap_cursor]==look_ahead_buffer[detect_overlap_cursor+len_overlap_pattern]: len_to_add +=1 detect_overlap_cursor +=1 offset_to_save = len(search_buffer)-i length_to_save = k-3 + len_to_add result.append( str ( offset_to_save & int('0011111111',2) ) ) result.append( str ( (length_to_save << 2) + (offset_to_save >> 8)) ) # stores 'OOOOOOOO' 'LLLLLLOO' match_not_found = False data_cursor += k + len_to_add bit = 1 size_taken += 2 break if not match_not_found: break # if no match found, just copy the byte and set 0 in bitmap if match_not_found: result.append(data[data_cursor]) data_cursor += 1 bit = 0 size_taken += 1 bitmap = ( bitmap|(bit << (7-bit_count)) ) bit_count -= 1 if bit_count == -1: result.insert(-size_taken,str(bitmap)) # the bitmap has to be placed before the bytes it describes size_taken = 0 # bitmap = 0 # reset useful stuff bit_count = 7 # # if some un-described data remains, insert bitmap : if bit_count != 7: result.insert(-size_taken,str(bitmap)) return result def lzss_decompress(data): """ input : data = ascii list of the data to be decompressed output : ascii list of decompressed data LZSS decompresses the data search buffer size = 2^10 = 1024 look-ahead buffer size = 2^6 = 64 TODO """ result=[] data_cursor = 0 while data_cursor < len(data): bitmap = data[data_cursor] data_cursor += 1 for k in range(8): if data_cursor >= len(data): break if (bitmap & (1 << k)) != 0: # coded offset = data[data_cursor] data_cursor += 1 length = data[data_cursor] + 3 # wouldn't be coded if length<3 copy_cursor = len(result)-offset for bob in range(length): result.append(result[copy_cursor+bob]) else: result.append(data[data_cursor]) data_cursor += 1 return result def lzss_compress_ti(data): cdata = lzss_compress(data) length=len(cdata) cdata.insert(0,str(length)) return cdata