功能:Lazarus中一个处理UTF8编码的类;
constructor TUTF8String.Create;
begin
Fvalue:='';
FCharsCount:=0;
FisErrorUTF8String:=false;
end;
function TUTF8String.GetUTF8Char(index:integer):string;
var
c:char;pc:pchar;b:byte;i,L,counter,cL,sp:integer;
begin
L:=length(FValue);
result:=''; sp:=0;
pc:=Pchar(Fvalue); counter:=0;
for i:=0 to L-1 do
begin
c:=PC^; b:=byte(c);
if (b and $FC)=$FC then CL:=6
else if (b and $F8)=$F8 then CL:=5
else if (b and $F0)=$F0 then CL:=4
else if (b and $E0)=$E0 then CL:=3
else if (b and $C0)=$C0 then CL:=2
else if (b and $80)<>$80 then CL:=1
else CL:=0; //非UTF8字符头字节,错误
if CL<>0 then begin
inc(counter);
if counter-1=index then
begin
result:=copy(Fvalue,sp+1,CL); //copy从1开始计算位置。
exit;
end;
PC:=PC+CL;
sp:=sp+CL;
end else begin PC:=PC+1; inc(sp); end;
if sp>=L then break;
end;
end;
procedure TUTF8String.SetValue(s:string);
var
c:char;pc:pchar;b:byte;i,L,counter,cL,sp:integer;
begin
if Fvalue<>s then
begin
Fvalue:=s;
sp:=0;//指针对应字符坐标计数器
FisErrorUTF8String:=false;
L:=length(FValue);
pc:=Pchar(Fvalue); counter:=0;
for i:=0 to L-1 do
begin
c:=PC^; b:=byte(c);
if (b and $FC)=$FC then CL:=6 //CL为根据开头字节判断出的UTF8字符长度
else if (b and $F8)=$F8 then CL:=5
else if (b and $F0)=$F0 then CL:=4
else if (b and $E0)=$E0 then CL:=3
else if (b and $C0)=$C0 then CL:=2
else if (b and $80)<>$80 then CL:=1
else begin CL:=0;FisErrorUTF8String:=true end; //非UTF8字符头字节,错误
if CL<>0 then
begin
inc(counter);
PC:=PC+CL;
sp:=sp+CL;
end else begin PC:=PC+1; inc(sp); end;
if sp>L then //指针超长,错误!
begin
FisErrorUTF8String:=true;
break;
end else if sp=L then break; //指针刚好指向结尾,正常结束
end;
self.Fcharscount:=counter;
end;
end;
联系客服