一、实现原理及步骤
每隔一定时间间隔(比如2分钟),抓取特定笔记ID的全部评论,统计出评论数量及子评论数量,与前一次抓取的数量比较,
如数量增加,说明有新增评论。然后通过排序评论,找出最新添加的评论。
二、技术要点
1、评论接口
https://edith.xiaohongshu.com/api/sns/web/v2/comment/page?note_id=%s&cursor=%s
该接口为GET,需要x-s签名验证。
2、javascript时间转delphi时间
//javascript时间转delphi时间
function GetTime_DateTime(s: string): TDateTime;
var
DateSeparator: char;
fs: TFormatSettings;
begin
GetLocaleFormatSettings(GetUserDefaultLCID, fs);
DateSeparator:=fs.DateSeparator;
Result := IncMilliSecond(StrToDateTime(Format('1970%s01%s01 08:00:00',[DateSeparator,DateSeparator])), StrToInt64(s));
end;
3、根据评论时间排序评论
//根据时间排序
function TimeCompareInt(List: TStringList; I1, I2: Integer): Integer;
var
dt1,dt2:TdateTime;
str1,str2:string;
c:integer;
begin
str1:=leftstr(List[I1],23);
str2:=leftstr(List[I2],23);
str1:=stringreplace(str1,'-',DateSeparator,[rfReplaceAll]);
str2:=stringreplace(str2,'-',DateSeparator,[rfReplaceAll]);
dt1:=strtodatetime(str1);
dt2:=strtodatetime(str2);
if(dt1-dt2>0)then c:=1 else c:=-1;
result:=c*SecondsBetween(dt1,dt2);
end;
三、评论监测完整源码
unit uCommentCount;
interface
uses
windows,classes,System.Net.URLClient, System.Net.HttpClient, System.Net.HttpClientComponent,
System.SysUtils,strutils,uLog,System.RegularExpressions,uFuncs,system.JSON,uConfig,
NetEncoding,ComObj,ActiveX;
const
wm_user=$0400;
wm_data=wm_user+100+1;
//评论接口
XHS_COMMON_API:string='https://edith.xiaohongshu.com/api/sns/web/v2/comment/page?note_id=%s&cursor=%s';
//子评论接口
XHS_MORE_COMMON_API:string='https://edith.xiaohongshu.com/api/sns/web/v2/comment/sub/page?note_id=%s&root_comment_id=%s&num=10&cursor=%s';
XHS_COMMON_REFER:string='https://www.xiaohongshu.com/';
type
pCountInfo=^TCountInfo; //评论统计数据
TCountInfo=record
Fnote_id:string; //笔记ID
Fcomment_count:integer; //评论数量
Fsub_comment_count:integer; //子评论数量
end;
TCountData=class
private
Fnote_id:string;
Fcomment_count:integer;
Fsub_comment_count:integer;
procedure set_note_id(note_id:string);
procedure set_comment_count(comment_count:integer);
procedure set_sub_comment_count(sub_comment_count:integer);
public
property note_id:string read Fnote_id write set_note_id;
property comment_count:integer read Fcomment_count write set_comment_count;
property sub_comment_count:integer read Fsub_comment_count write set_sub_comment_count;
constructor Create(note_id:string;comment_count,sub_comment_count:integer);
destructor Destroy;
end;
TCommentCount=class(TThread)
private
FId:cardinal;
Fnoteid,Ftitle:string;
Fhasmore,Fcursor:string; //Fhasmore是否还有评论数据,Fcursor下一篇评论指针
Fxs,Fxt:string; //x-s,xt签名数据
FCountInfo:TCountInfo; //评论统计数据
Fcomment_count,Fsub_comment_count:integer;
Fcomments:tstringlist; //评论列表
Fsuccess:boolean;
class var Fform: HWND;
class var Fcookie: string; //访问接口需要的cookie
class procedure SetForm(const hForm: HWND); static;
class procedure SetCookie(const cookie: string); static;
procedure parseData(data:string); //解析评论数据
function JsonExist(parent:TJSONObject;child:string):boolean;
function getSubCommentCount(sub_Comment_cout:string):integer; //子评论数量
protected
procedure Execute; override;
public
constructor Create(id:cardinal;note_id:string);
destructor Destroy;
property id:cardinal read FId;
class property form: HWND read Fform write SetForm;
class property cookie: string read Fcookie write SetCookie;
property comment_count:integer read Fcomment_count; //评论数量
property sub_comment_count:integer read Fsub_comment_count; //子评论数量
property note_id:string read Fnoteid;
property success:boolean read Fsuccess;
procedure working(); //工作流程
//function getDataFromQuery(note_id,cursor:string):string;
function getXsXt(url:string):boolean; //获取x-s,x-t签名
function GetRelativeUrl(url:string):string; //接口相对链接
function getRequestResult(apiurl:string;Cookie:string;xs,xt,refer:string):string; //获取接口数据
end;
implementation
//评论统计构造方法
constructor TCountData.Create(note_id:string;comment_count,sub_comment_count:integer);
begin
inherited Create;
Fnote_id:=note_id;
Fcomment_count:=comment_count;
Fsub_comment_count:=sub_comment_count;
end;
destructor TCountData.Destroy;
begin
inherited Destroy;
end;
procedure TCountData.set_note_id(note_id:string);
begin
Fnote_id:=note_id;
end;
procedure TCountData.set_comment_count(comment_count:integer);
begin
Fcomment_count:=comment_count;
end;
procedure TCountData.set_sub_comment_count(sub_comment_count:integer);
begin
Fsub_comment_count:=sub_comment_count;
end;
//----------------------------------------------------------------------------------
//传入线程id号,笔记id
constructor TCommentCount.Create(id:cardinal;note_id:string);
var
line:string;
begin
//inherited;
//FreeOnTerminate := True;
inherited Create(True);
FId:=id;
Fnoteid:=note_id;
Fhasmore:='false';
Fsuccess:=false;
Fcomments:=tstringlist.Create;
end;
destructor TCommentCount.Destroy;
begin
inherited Destroy;
Fcomments.Free;
end;
//在子线程中运行
procedure TCommentCount.Execute;
begin
working();
end;
//主要工作流程
procedure TCommentCount.working();
var
bRet:boolean;
js,apiurl,relativeurl:string;
i:integer;
cursor,comment_id,line:string;
begin
try
bRet:=false;
apiurl:=format(XHS_COMMON_API,[Fnoteid,'']);
relativeurl:=getrelativeurl(apiurl);
bRet:=getXsXt(relativeurl); //获取x-s签名
if(bRet=false)then begin log('getXsXt=失败');exit;end;
js:=getRequestResult(apiurl,Fcookie,Fxs,Fxt,XHS_COMMON_REFER); //访问接口,获取评论数据
if(js='')then begin log('js=空');exit;end;
if(pos('成功',js)<=0)then
begin
log(js);
exit;
end;
log(js);
parseData(js); //解析评论数据
while Fhasmore='true' do //如果还有评论,继续抓取
begin
apiurl:=format(XHS_COMMON_API,[Fnoteid,Fcursor]);
relativeurl:=GetRelativeUrl(apiUrl);
bRet:=getXsXt(relativeurl);
if(not bRet)then begin log('getXsXt=失败');continue;end;
js:=getRequestResult(apiurl,uConfig.cookie,Fxs,Fxt,XHS_COMMON_REFER);
if(js='')then begin log('js=空');continue;end;
if(pos('成功',js)<=0)then begin log('js=失败'+js);exit;end;
log(js);
parseData(js);
end;
Fsuccess:=true;
finally
if(Fsuccess)then
begin
Fcomments.Sorted:=False;
Fcomments.CustomSort(TimeCompareInt); // 根据评论时间,排序评论
Fcomments.SaveToFile(uconfig.workdir+'\'+Fnoteid+'.txt',Tencoding.UTF8);
SendMessage(Fform,wm_data,1,integer(self)); //发送成功消息
end else begin
SendMessage(Fform,wm_data,2,integer(self)); //发送失败消息
end;
self.Free;
end;
end;
//签名接口:技术支持:扣扣:39848872微信:byc6352
function TCommentCount.getXsXt(url:string):boolean;
const
args2:string='undefined';
args3:string='{"url":"%s","data":"%s","a1":"%s"}';
API_URL:string='http://helpnow.top:8081/xs?key=533278440281';
var
payload,a1,data:string;
json:TJSONOBJECT;
begin
result:=false;
a1:=uConfig.a1;
payload:=format(args3,[url,args2,a1]);
Log(payload);
try
data:=uFuncs.getPostResult(API_URL,payload);
if(data='')then exit;
//data:=StringReplace(data,'\','',[rfReplaceAll]);
//if(data[1]='"')then delete(data,1,1);
//if(data[length(data)]='"')then delete(data,length(data),1);
Log(data);
json := TJSONObject.ParseJSONValue(data) as TJSONObject;
if json = nil then exit;
Fxs:=json.GetValue('X-s').Value;
Fxt:=json.GetValue('X-t').Value;
result:=true;
finally
if(json<>nil)then json.Free;
end;
end;
//获取相对链接
function TCommentCount.GetRelativeUrl(url:string):string;
var
i:integer;
s:string;
begin
result:='';
if(url='')then exit;
i:=pos('//',url);
if(i<=0)then exit;
s:=rightstr(url,length(url)-i-2);
i:=pos('/',s);
if(i<=0)then exit;
s:=rightstr(s,length(s)-i+1);
result:=s;
end;
//解析评论json数据
procedure TCommentCount.parseData(data:string);
var
json,j1,j2,j3,j4,j5:TJSONObject;
ja,ja1,sub_comments:TJSONArray;
nickname:string;
videoType:string;
i,j,sub_comment_count_i:integer;
note_id:string;
sub_comment_has_more,user_id,content,line:string;
comment_id,sub_comment_cursor,sub_comment_count:string;
create_time:string;
begin
try
json := TJSONObject.ParseJSONValue(data) as TJSONObject;
if json = nil then exit;
j1:=json.GetValue('data') as TJSONObject;
Fhasmore:=j1.GetValue('has_more').Value; //该字段表示 是否还有评论
if(not JsonExist(j1,'cursor'))then exit; //评论指针
Fcursor:=j1.GetValue('cursor').Value;
ja:=j1.GetValue('comments') as TJSONArray; //评论列表
Fcomment_count:=Fcomment_count+ja.Size; //评论统计
for I := 0 to ja.Size-1 do
begin
j2:=ja.Get(i) as TJSONObject;
create_time:=j2.GetValue('create_time').Value; //评论创建时间(javascript时间)
content:=j2.GetValue('content').Value; //评论内容
j3:=j2.GetValue('user_info') as TJSONObject;
user_id:=j3.GetValue('user_id').Value; //评论者ID
nickname:=j3.GetValue('nickname').Value; //评论者昵称
line:=uFuncs.getTime(uFuncs.GetTime_DateTime(create_time))+' '+user_id+' '+nickname+' '+content;
Fcomments.Add(line); //评论列表
sub_comment_has_more:=j2.GetValue('sub_comment_has_more').Value; //是否有子评论
if(sub_comment_has_more='true')then
begin
sub_comment_count:=j2.GetValue('sub_comment_count').Value;
sub_comment_count_i:=getSubCommentCount(sub_comment_count);
Fsub_comment_count:=Fsub_comment_count+sub_comment_count_i; //子评论统计
end;
end;
finally
if(json<>nil)then json.Free;
end;
end;
//子评论数量
function TCommentCount.getSubCommentCount(sub_Comment_cout:string):integer;
var
s:string;
begin
s:=sub_Comment_cout;
s:=StringReplace(s,'"','',[rfReplaceAll]);
result:=strtoint(s);
end;
//判断节点是否存在
function TCommentCount.JsonExist(parent:TJSONObject;child:string):boolean;
var
i:integer;
keyname:string;
begin
result:=false;
if(parent=nil)then exit;
for i:=0 to parent.count-1 do
begin
keyname:=parent.Get(i).JsonString.toString;
keyname:=midstr(keyname,2,length(keyname)-2);
if(keyname=child)then
begin
result:=true;
exit;
end;
end;
end;
//发送https get 请求
function TCommentCount.getRequestResult(apiurl:string;Cookie:string;xs,xt,refer:string):string;
var
client: TNetHTTPClient;
ss: TStringStream;
s,id:string;
AResponse:IHTTPResponse;
i:integer;
begin
result:='';
try
client := TNetHTTPClient.Create(nil);
SS := TStringStream.Create('',TEncoding.UTF8); //TEncoding.UTF8
ss.Clear;
with client do
begin
ConnectionTimeout := 30000; // 30秒
ResponseTimeout := 30000; // 30秒
AcceptCharSet := 'utf-8';
UserAgent := USER_AGENT; //1
client.AllowCookies:=true;
client.HandleRedirects:=true;
Accept:='application/json, text/plain, */*'; //'*/*'
client.ContentType:='application/json'; //2
client.AcceptLanguage:='zh-CN,zh;q=0.9';
//client.AcceptEncoding:='gzip, deflate, br';
client.CustomHeaders['Cookie'] := cookie;
client.CustomHeaders['Referer'] := refer;
client.CustomHeaders['X-s'] := xs;//
client.CustomHeaders['X-t'] :=xt; //1680939868543
try
AResponse:=Get(apiurl, ss);
if(AResponse.StatusCode=200)then
result:=ss.DataString;
except
on E: Exception do
Log(e.Message);
end;
end;
finally
ss.Free;
client.Free;
end;
end;
//------------------------------------------属性方法-------------------------------------
class procedure TCommentCount.SetForm(const hForm: HWND);
begin
Fform:=hForm;
end;
class procedure TCommentCount.SetCookie(const cookie: string);
begin
Fcookie:=cookie;
end;
end.
四、使用方法:
procedure TFMainForm.Timer1Timer(Sender: TObject);
var
i:integer;
note_id:string;
countdata:Tcountdata;
commentCount:TcommentCount;
begin
memoInfo.Lines.Add(gettime()+' 开始抓取数据...');
for I := 0 to mCommentCountList.Count-1 do
begin
countdata:=mCommentCountList[i];
note_id:=countdata.note_id;
commentCount:=TcommentCount.Create(i,note_id);
if(uConfig.DEBUG)then commentCount.working else
commentCount.start;
end;
end;
五、成品