- 注册时间
- 2004-8-28
- 最后登录
- 1970-1-1
|
发表于 2008-5-25 19:57:33
|
显示全部楼层
恶劣工作环境代码
用于从非文本内容中滤过出连续的类似文字的东西.
这样他要用就得自己改了吧, jason兄~~- function TMainFrm.RipThreadExecute(Sender: PThread): Integer;
- var
- Spos, Ssize, Dripd, crLenth, crByte, CrAddon: Int64;
- Position0, Position1, Position2: Integer;
- StartTick: DWORD;
- B1, B2: Byte;
- W1: WORD;
- mRipEng, mRipMix, mRipHan: Boolean;
- mIsHan, Flush, Mixed: Boolean;
- aEng, aHan: Boolean;
- limEng, limHan, limMix: Integer;
- RecPre, RecLine: String;
- IsHan: function(B1: Byte; B2: Byte): Boolean;
- ExfHan: function(B1: Byte; B2: Byte): Boolean;
- IsEng: function(B: Byte): Boolean;
- begin
- StartTick := GetTickCount();
- mRipEng := RipEng.Checked;
- mRipHan := RipHan.Checked;
- mRipMix := RipMix.Checked;
- LimEng := MHLimEng.Position;
- LimHan := MHLimHan.Position;
- LimMix := MHLimMix.Position;
- IsHan := nil;
- if RipBig5.Checked then IsHan := IsBIG5;
- if RipGBK.Checked then IsHan := IsGBK;
- if RipGB2312.Checked then IsHan := IsGB2312;
- if RipJIS.Checked then IsHan := IsJIS;
- ExfHan := nil;
- if ExfBig5.Checked then ExfHan := IsBIG5;
- if ExfGBK.Checked then ExfHan := IsGBK;
- if ExfGB2312.Checked then ExfHan := IsGB2312;
- if ExfJIS.Checked then ExfHan := IsJIS;
- if RipJIS.Checked then
- IsEng := IsAsciiOrHalfWidthKatakana
- else
- IsEng := IsAscii;
- if noWholeBuff.Checked then
- begin
- BufStm := NewReadFileStream(RipInputEdt.Text);
- Ssize := BufStm.Size;
- end
- else
- begin
- InputFStm := NewReadFileStream(RipInputEdt.Text);
- BufStm := NewMemoryStream();
- Ssize := InputFStm.Size;
- BufStm.Size := Ssize;
- Stream2Stream(BufStm, InputFStm, InputFStm.Size);
- InputFStm.Free;
- BufStm.Position := 0;
- end;
- StringStm := NewMemoryStream();
- StringStm.Position := 0;
- Spos := 0;
- Dripd := 0;
- Crlenth := 0;
- Crbyte := 0;
- CrAddon := 0;
- aEng := False;
- aHan := False;
- Flush := False;
- Mixed := False;
- RarProgressBar1.Position1 := 0;
- RarProgressBar1.Position2 := 0;
- OutputFStm := NewWriteFileStream(RipOutputEdt.Text);
- OutputFStm.Size := 0; //ReWrite
- while Spos <= Ssize do
- begin
- if CrLenth div 1024 >= StringStm.Size div 1024 then
- StringStm.Size := (CrLenth div 1024) + 1024; //扩展字符缓冲
- BufStm.Read(B1, 1);
- Inc(Spos);
- if IsEng(B1) then
- begin
- if mRipMix or mRipEng then
- begin
- aEng := True;
- if aHan then
- begin
- Mixed := True;
- aHan := False;
- end;
- //写入单个Ascii
- INC(CrAddon);
- case B1 of
- $09: StringStm.WriteStr('\t');
- $0D: StringStm.WriteStr('\r');
- $0A: StringStm.WriteStr('\n');
- else
- StringStm.Write(B1, 1);
- Dec(CrAddon);
- end;
- Inc(CrLenth);
- Inc(CrByte);
- end//mripMixEng
- else
- begin
- if crLenth > 0 then
- Flush := True;
- end;
- end
- else
- begin
- if (B1 in [$81..$FE]) then //EUC
- begin
- BufStm.Read(B2, 1);
- Inc(Spos);
- if mRipMix or mRipHan then
- mIsHan := IsHan(B1, B2)
- else
- mIsHan := ExfHan(B1, B2);
- if mIsHan then
- begin
- if mRipMix or mRipHan then
- begin
- aHan := True;
- if aEng then
- begin
- Mixed := True;
- aEng := False;
- end;
- //写入一个汉字
- W1 := (B2 SHL 8) + B1;
- StringStm.Write(W1, 2);
- Inc(CrLenth);
- Inc(CrByte, 2);
- end//mRipHan
- else
- begin//00
- if crLenth > 0 then
- Flush := True;
- end;
- end//isHan
- else
- begin
- //回退字节
- BufStm.Seek(-1, spCurrent);
- Dec(Spos);
- if CrLenth > 0 then
- Flush := True;
- end;//notisHan
- end//preHan
- else
- begin
- //低字节或者7F,FF
- if (Crlenth > 0) {or (not(IsHalfWidthKatakana(B1)) and (RipJIS.Checked))} then
- Flush := True;
- end;//notprebig5
- end;//isascii
- if Spos >= SSize then//no Ssize-1 because not overflow
- begin
- Inc(Spos);
- Flush := True;
- end;
- if Flush then
- begin
- if (Mixed and mRipMix and (Crlenth >= LimMix)) or (not (Mixed) and aEng and mRipEng and (Crlenth >= LimEng)) or
- (not (Mixed) and aHan and mRipHan and (Crlenth >= LimHan)) then
- begin
- RecPre := Int2Hex(Spos - 1 - Crbyte, 8) + ',' + Int2Hex(Crbyte, 8) + ',';
- OutputFStm.WriteStr(RecPre);
- StringStm.Position := 0;
- SetLength(RecLine, CrByte + CrAddon);
- //Stream2Stream(OutputFStm, StringStm, CrByte);
- StringStm.Read(PChar(RecLine)^, Length(RecLine));
- //with StrReplace(RecLine, '\', '\\') do;
- //while StrReplace(RecLine, #$0D, '\r') do;
- //while StrReplace(RecLine, #$0A, '\n') do;
- OutputFStm.WriteStr(RecLine);
- Inc(Dripd, CrByte);
- OutputFStm.WriteStr(#$0D#$0A);
- end;
- StringStm.Position := 0;
- aEng := False;
- aHan := False;
- Crlenth := 0;
- Crbyte := 0;
- CrAddon := 0;
- Mixed := False;
- Flush := False;
- end;
- Position0 := Spos * 100 div Ssize;
- if Position1 <> Position0 then
- begin
- Position1 := Position0;
- RarProgressBar1.Position1 := Position1;
- end;
- Position0 := DRipd * 100 div Ssize;
- if Position2 <> Position0 then
- begin
- Position2 := Position0;
- RarProgressBar1.Position2 := Position0;
- end;
- end;//while byte
- StringStm.Free;
- BufStm.Free;
- OutputFStm.Free;
- AddLog('Ripping done in ' + Int2Str(GetTickCount() - StartTick) + 'ms');
- Result := 0;
- end;
复制代码 |
|