lava版lzw压缩算法

玄狼剑 发表于 2008-2-2 12:59:14

平台：lava20k或24k#define TABLESIZE 2048//2^MAXBITS字典大小
#define INITBITS 9 //初始位
#define MAXBITS 11//in theory it can be 16 but limited by NC3000 ram最大位
#define HASHSHIFT MAXBITS-8 //用于compress中的hash函数
#define CLEARCODE 256 //清除字典标志
#define TERMINATOR 257 //结束标志
#define FIRSTCODE 258 //第一个代码
#define ESC_KEY 27
#define DECODESIZE 1000 //解压字串缓存大小
//字典树,前后缀都是为了在压缩时防止hash函数冲突用的
struct TABLE
{
int value; //存放CODE
int prefix; //前缀
char append; //后缀
};
struct TABLE table;

long buffer; //input和output的缓存
char bitscounter; //buffer中的位数
int index; //索引
long maxcode; //最大的code
char bitsnum; //当前code的位数
int nextcode; //当前code
int bytesin; //已读取的字节数
char decode; //解压字串缓存
int decounter; //解压字串计数器

char infilename,outfilename;//filepath
char infile,outfile;//filehandle

//返回maxcode
int maxval(char n)
{
return((1<<n)-1);
}

//清空字典
void cleartable()
{
int i;
bitsnum=INITBITS;
nextcode=FIRSTCODE;
maxcode=maxval(bitsnum);
for(i=0;i<TABLESIZE;i++)
table.value=-1;
}

//初始化
void init()
{
infile=outfile=buffer=bitscounter=0;
bytesin=0;
memset(infilename,0,60);
memset(outfilename,0,60);
}

//get the length of a file
long fsize(char fp)
{
long a,b;
a=ftell(fp);
b=fseek(fp,0,2);
fseek(fp,a,0);
return b;
}

//to find a word from the end of a string,return the position in the string
char findword(char &string[],char word)
{
int i;
for(i=strlen(string);i>0;i--)
{
if(string==word)break;
}
return (i-1);
}

//if choose a file return 1 else return 0
char choose()
{
char path;
char i;
memset(path,0,15);
ChDir("/");
for(;;)
{
if(!FileList(path)||!strcmp(path,".."))
{
if(infilename==0)return 0;
ChDir("..");
infilename=0;
continue;
}
strcat(infilename,"/");
strcat(infilename,path);
if(!ChDir(path))break;
}
return 1;
}

//find node in the tree
int findnode(int prefix,char word)
{
int offset;
int pointer;
//通过一个hash函数进行定位
pointer=(word<<HASHSHIFT)^prefix;
//以后的都是为了解决冲突
while(pointer>=TABLESIZE)
pointer=pointer-(TABLESIZE>>1);
if(pointer)
offset=TABLESIZE-pointer;
else
offset=1;
for(;;)
{
if(table.value==-1)
return(pointer);
if(table.prefix==prefix&&table.append==word)
return(pointer);
//if has a colesion
pointer=pointer-(offset--);
if(!offset)
offset=TABLESIZE>>1;
if(pointer<0)
pointer=pointer+TABLESIZE;
}
}

//动态输出
void outputc(char fp,int code)
{
buffer=buffer|code<<(32-bitsnum-bitscounter);
bitscounter=bitscounter+bitsnum;
while(bitscounter>=8)
{
putc(buffer>>24,outfile);
bitscounter=bitscounter-8;
buffer=buffer<<8;
}
}

//由于压缩时是动态输出,解压时也要动态输入
int input(char fp)
{
int value;
while(bitscounter<=24)
{
buffer=buffer|getc(fp)<<(24-bitscounter);
bitscounter=bitscounter+8;
}
value=buffer>>(32-bitsnum);
bitscounter=bitscounter-bitsnum;
buffer=buffer<<bitsnum;
return(value);
}

//解压字串的函数
void decodestring(char &data[],int code)
{
int i=0;
while(code>255)
{
data=table.append;
code=table.prefix;
if(++i>=DECODESIZE)
{
SetScreen(1);
fclose(infile);
fclose(outfile);
printf("out of the decode area");
getchar();
exit(1);
}
}
data=code;
}

//解压
void expand()
{
int character,stringcode; //后缀和前缀
char flag=1; //是否遇到CLEARCODE的标志,置一以后连序读两个字节
long len=0; //原始文件大小,用于检验文件是否正确压缩
char name; //原文件名
memset(name,0,15);
infile=fopen(infilename,"rb");
if(!infile)
{
SetScreen(1);
printf("open file error!\n");
getchar();
return;
}
if(getc(infile)!='l'||getc(infile)!='z'||getc(infile)!='w')
{
SetScreen(1);
printf("This is not a lzw file!");
fclose(infile);
getchar();
return;
}
fread(&len,1,4,infile);//get the length of file
character=getc(infile); //get the name of file
fread(name,1,character,infile);
strcpy(outfilename,infilename);
//change outfile name
strcpy(outfilename+1+findword(outfilename,'/'),name);
outfile=fopen(outfilename,"w");
if(!outfile)
{
fclose(infile);
SetScreen(1);
printf("Create file error!");
getchar();
return;
}
SetScreen(1);
printf("expanding...\n");
nextcode=FIRSTCODE;
bitsnum=INITBITS;
maxcode=maxval(bitsnum);
decounter=0;
while((index=input(infile))!=TERMINATOR) //遇到结束标志就停止
{
if(Inkey()==ESC_KEY)
{
fclose(infile);
fclose(outfile);
DeleteFile(outfilename);
SetScreen(1);
printf("ESC KEY HAS BEEN PUSHED!");
getchar();
return;
}
if(++bytesin==1000) //统计进度
{
putchar('.');
bytesin=0;
}
if(index==CLEARCODE) //如果遇到清除标志的处理
{
flag=1;
nextcode=FIRSTCODE;
bitsnum=INITBITS;
maxcode=maxval(bitsnum);
putchar('C');
continue;
}
if(flag) //如果上一个是清除标志,就再读一个字节
{
flag=0;
stringcode=character=index;
putc(character,outfile);
continue;
}
if(index>=nextcode) //读到字典中还没有的数
{
decode=character;
decounter=1;
decodestring(decode,stringcode);
}
else //读到字典中已有的数
{
decounter=0;
decodestring(decode,index);
}
character=decode; //解压后的最后一个字节是后缀
while(decounter>=0) //输出字串
putc(decode,outfile);
//nextcode+1
//因为压缩的关系,nextcode一定是小于字典大小
if(nextcode<=maxcode)
{
table.prefix=stringcode;
table.append=character;
if(nextcode==maxcode&&bitsnum<MAXBITS)
{
putchar('+');
maxcode=maxval(++bitsnum);
}
}
stringcode=index;
}//while

//判断是否成功解压
if(len!=ftell(outfile))
{
fclose(infile);
fclose(outfile);
SetScreen(1);
printf("Did not expand exactly,please check your programme!");
getchar();
return;
}
fclose(infile);
fclose(outfile);
SetScreen(1);
printf("in:%s\nout:%s\nOK",infilename,outfilename);
getchar();
}

//procedure compress
void compress()
{
int stringcode,i; //stringcode前缀
char character,flag=0;//character后缀
long inlen,outlen; //输入输出文件大小
char name;
infile=fopen(infilename,"rb");
if(!infile)
{
SetScreen(1);
printf("open file error!\n");
getchar();
return;
}
strcpy(outfilename,infilename);
strcpy(name,infilename+1+findword(infilename,'/'));//get name of infile
for(i=strlen(outfilename);i>0;i--)
{
if(outfilename=='/')break;
if(outfilename=='.')
{
outfilename=0;
break;
}
}
strcat(outfilename,".lzw");
outfile=fopen(outfilename,"w");
if(!outfile)
{
fclose(infile);
SetScreen(1);
printf("Create file error!");
getchar();
return;
}
inlen=fsize(infile);
fwrite("lzw",1,3,outfile);
fwrite(&inlen,1,4,outfile);
inlen=strlen(name);
fwrite(&inlen,1,1,outfile);
fwrite(name,1,inlen,outfile);
SetScreen(1);
printf("Compressing...\n");
cleartable();
stringcode=getc(infile);
while(!feof(infile))
{
if(Inkey()==ESC_KEY)
{
fclose(infile);
fclose(outfile);
DeleteFile(outfilename);
SetScreen(1);
printf("ESC KEY HAS BEEN PUSHED!");
getchar();
}
character=getc(infile);
//压缩进度
if(++bytesin==1000)
{
putchar('.');
bytesin=0;
}
index=findnode(stringcode,character); //通过hash函数查找节点
/* table.value already exist */
if(table.value!=-1) //如过节点值存在,前缀等于节点值
stringcode=table.value;
/* table.value not exist */
else //如果节点值不存在
{
if(nextcode<=maxcode) //如果没达到上限,就添加进字典
{
table.value=nextcode++;
table.prefix=stringcode;
table.append=character;
}
outputc(outfile,stringcode); //输出前缀
stringcode=character; //前缀等于后缀
if(nextcode>maxcode) //超过上限
{
if(bitsnum<MAXBITS)
{
putchar('+');
maxcode=maxval(++bitsnum);
}
else//bitsnum==MAXBITS
{
outputc(outfile,CLEARCODE);
putchar('C');
cleartable();
}
}
} //else table.value==-1
} //while
outputc(outfile,stringcode);
outputc(outfile,TERMINATOR);
outputc(outfile,0); //这两步不能省,用于把buffer中的数据都输出
outputc(outfile,0);
//compression end
inlen=fsize(infile);
outlen=ftell(outfile);
fclose(infile);
fclose(outfile);
SetScreen(1);
printf("in:%s %d Bytes\nout:%s %d Bytes\nOK",infilename,inlen,outfilename,outlen);
getchar();
}

void main()
{
char key;
for(;;)
{
SetScreen(1);
init();
printf("LZW Compression programme\na.compress\nb.expand\nc.exit");
key=getchar();
if(key=='a')
{
if(!choose())continue;//if not choose a file
compress();
}
else if(key=='b')
{
if(!choose())continue;//if not choose a file
expand();
}
else if(key==ESC_KEY||key=='c')break;
}
}

wanglongwen 发表于 2008-2-2 13:36:31

很实用啊爽爽爽爽希望能够开发个BBK解释器就行了

布网鱼 发表于 2009-5-13 18:38:07

非常强大，我以前怎么就没发现呢？

红色激情 发表于 2009-5-14 14:47:13

布网鱼 发表于 2009-5-15 12:53:04

不是……也就是2008年的东西。
只是……我用了一下，速度太慢了。还是Eastsun那个bin的实用。
可惜没有NC2600版的啊，什么时候移植一下:)

红色激情 发表于 2009-5-15 23:14:29

布网鱼 发表于 2009-5-16 18:39:45

哎，移植问题很大啊。我捣鼓了半天只能显示个界面来:Q

页: [1]

易码技术论坛's Archiver

lava版lzw压缩算法