“断线重连”是手机游戏的必备功能。除了网络连接不稳定外,部分手机系统为了节省电量,在默认情况下黑屏十几秒后就会断开网络连接。如果没有重连机制,玩家只要短时间不在手机旁(比如去喝杯水),回来后就得退出游戏重新打开,大大影响游戏体验。图是《Unity3D网络游戏实战(第2版)》的范例游戏《铁流的轮印》在断线后会自动重连。它是如何实现的呢?下面一起来看看。
一、原理解析
图展示了服务端断线重连的流程。以3.2节《球球大作战》的服务端架构为例,gateway作为中介连接客户端与内部服务,当客户端断开连接(阶段)时,只要gateway不去触发下线流程,agent和scene就都不会受到影响。当客户端重新发起连接(阶段②)时,经过校验,如果gateway认为它是合法的连接(阶段③),则会将新连接与agent关联起来,这样便完成了重连。整个重连过程只有qatewav参与,服务端系统中的其他服务均不受影响。
二、身份标识
为验证重连客户端的合法性,需给每个玩家生成代表身份标识的密码,如代码中的key变量,gateway会为每个玩家生成身份标识码(读者可以将它设计得更加复杂),并让其随着登录协议返回给客户端(代码略)。在发起重连时,客户端必须将标识码发回给服务端,以验证身份。如果不是重连的客户端,它无法得知标识码就无法以断线的名义冒充身份。下图展示了断线重连的完整流程
gateway/init.lua
-- player class
function gate_player()
local m = {
player_id = nil,
agent = nil,
conn = nil,
key = math.random(1, 999999999),
lost_conn_time = nil,
msgcache = {}, -- not send msg cache
}
return m
end
除了key,代码中还定义了两个变量,lost_conn_time用于记录最后一次断开连接的时间,msgcache用于缓存服务端未能发出的协议。
三、消息缓存
断线期间服务端可能会向客户端发送消息,由于这些消息不能传达,因此需由gateway缓存起来,待重连后发送,见如下代码。修改gateway向客户端发送消息的远程调用send,可实现如下功能:
如果没有断线,调用s.resp.send_by_fd正常发送消息。
如果在断线期间,将消息存入gplayer.msgcache中。
为避免占用过多内存,在缓存了大于500条的消息后,触发下线逻辑,不允许重连。
gateway/init.lua
s.resp.send = function(source, player_id, msg)
local gplayer = players[player_id]
if gplayer == nil then
return
end
local c = gplayer.conn
if c == nil then
table.insert(gplayer.msgcache, msg)
local len = #gplayer.msgcache
if len > 500 then
skynet.call("agentmgr", "lua", "reqkick", player_id, "gate消息缓存过多")
end
return
end
s.resp.send_by_fd(nil, c.fd, msg)
end
四、处理重连请求
定义如图所示的重连协议reconnect,客户端会发送玩家id和身份标识,服务端会回应重连成功或者失败。
由于reconnect协议由gateway处理,因此在处理消息的process msg方法中,要做个特殊判断,即如果收到reconnect协议,交由processreconnect方法处理。
gateway/init.lua
-- 消息处理
local process_msg = function(fd, msgstr)
local cmd,msg = str_unpack(msgstr)
skynet.error("recv "..fd.." ["..cmd.."] {"..table.concat( msg, ",").."}")
-- 连接对象
local conn = conns[fd]
-- 如果完成登录,player_id不为空,否则为空
local player_id = conn.player_id
if cmd == "reconnect" then
process_reconnect(fd, msg)
return
end
-- ...
end
process_reconnect
代码展示了断线重连的具体处理方法,它有如下几个要点:
做出严格的条件判断,只有断线的玩家才能接受重连。未登录(ifnot gplayer为真)、未掉线(ifgplayer.conn为真)、身份标识错误(if gplayer.key~=key为真)均不可重连。
绑定新连接(conn)和玩家对象(gplayer)。
回应重连消息{"reconnect",0}。
发送缓存中的消息。
local process_reconnect = function(fd, msg)
local player_id = tonumber(msg[2])
local key = tonumber(msg[3])
--con
local conn = conns[fd]
if not conn then
skynet.error("reconnect fail,conn not exist")
return
end
--gplayer
local gplayer = players[player_id]
if not gplayer then
skynet.error("reconnect fail,gplayer not exist")
return
end
if gplayer.conn then
skynet.error("reconnect fail,conn not break")
return
end
if gplayer.key ~= key then
skynet.error("reconnect fail,key error")
return
end
--bind
gplayer.conn = conn
conn.player_id = player_id
--response
s.resp.send_by_fd(nil, fd, {"reconect", 0})
end
五、断线处理
与之前介绍的“登出流程”不同,当客户端掉线时,gateway不会去触发掉线请求(即向agentmgr请求regkick)。如代码所示,掉线时仅仅取消玩家对象(gplayer)与旧连接(conn)的关联(即gplayer.conn=nil)。为防止客户端不再发起重连导致的资源占用,程序会开启一个定时器(skynet.timeout),若15秒后依然是掉线状态(if gplayer.conn~=nil为假),则向agentmgr请求下线。
gateway/init.lua
-- 断线
-- 如果agentmgr仲裁通过,或是agentmgr想直接把玩家踢下线,在保存数据后,它会通知gateway
-- 来删掉玩家对应的conn和gateplayer对象
local disconnect = function(fd)
local c = conns[fd]
if not c then
return
end
local player_id = c.player_id
-- 还没完成登录
if not player_id then
return
-- 已在游戏中
else
local gplayer = players[player_id]
gplayer.conn = nil
skynet.timeout(15*100, function()
if gplayer.conn ~= nil then
return
end
players[player_id] = nil
local reason = "diconnect timeout"
-- 向agentmgr发送下线请示
skynet.call("agentmgr", "lua", "reqkick", player_id, reason)
end)
end
end
六、登录增加key返回
gateway/init.lua
-- 客户端关联agent
-- 功能:完成登录流程之后,login会通知gateway,将客户端连接和agent关联起来
s.resp.sure_agent = function(source, fd, player_id, agent)
local conn = conns[fd]
--登录过程中已下线
if not conn then
skynet.call("agentmgr", "lua", "reqkick", player_id, "未完成登录即下线")
return false
end
conn.player_id = player_id
local gplayer = gate_player()
gplayer.player_id = player_id
gplayer.agent = agent
gplayer.conn = conn
players[player_id] = gplayer
return true,gplayer.key
end
login/init.lua
-- 登录消息
s.client.login = function(fd, msg, source)
local player_id = tonumber(msg[2])
local password = tonumber(msg[3])
local gate = source
node = skynet.getenv("node")
--校验用户名密码
if password ~= 123 then
return {"login", 1, "password error"}
end
-- 通知agentmgr,分配agent
local isok,agent = skynet.call("agentmgr","lua","reqlogin",player_id, node, gate)
if not isok then
return {"login", 1, "req mgr fail"}
end
-- 通知gate分配的agent
local isok,key = skynet.call(gate, "lua", "sure_agent", fd, player_id, agent)
skynet.error("login sure_agent key="..key)
if not isok then
return {"login", 1, "res gate fail"}
end
skynet.error("login success ".. player_id)
return {"login", player_id, key, "login success"}
end
六、测试
1、重连
server
root@server-VirtualBox:/home/bbb_fight# sh start.sh 1
[:00000002] LAUNCH snlua bootstrap
[:00000003] LAUNCH snlua launcher
[:00000004] LAUNCH snlua cdummy
[:00000005] LAUNCH harbor 0 4
[:00000006] LAUNCH snlua datacenterd
[:00000007] LAUNCH snlua service_mgr
[:00000008] LAUNCH snlua main
[:00000009] LAUNCH snlua clusterd
[:0000000a] LAUNCH snlua nodemgr nodemgr 0
[:0000000a] [service][start] name=nodemgr id=0
[:0000000b] LAUNCH snlua gate
[:0000000b] Listen on 127.0.0.1:7771
[:0000000c] LAUNCH snlua gateway gateway 1
[:0000000c] [service][start] name=gateway id=1
[:0000000c] gate init port=8001
[:0000000c] Listen socket: 0.0.0.0 8001
[:0000000d] LAUNCH snlua gateway gateway 2
[:0000000d] [service][start] name=gateway id=2
[:0000000d] gate init port=8002
[:0000000d] Listen socket: 0.0.0.0 8002
[:0000000e] LAUNCH snlua login login 1
[:0000000e] [service][start] name=login id=1
[:0000000f] LAUNCH snlua login login 2
[:0000000f] [service][start] name=login id=2
[:00000010] LAUNCH snlua agentmgr agentmgr 0
[:00000010] [service][start] name=agentmgr id=0
[:00000011] LAUNCH snlua scene scene 1001
[:00000011] [service][start] name=scene id=1001
[:00000012] LAUNCH snlua scene scene 1002
[:00000012] [service][start] name=scene id=1002
[:00000013] LAUNCH snlua admin admin 1001
[:00000013] [service][start] name=admin id=1001
[:00000013] admin init listenfd=4
[:00000008] KILL self
[:00000002] KILL self
connect from 127.0.0.1:42378 5
[:0000000c] socket connectted 5
[:0000000c] recv 5 [login] {login,101,123}
[:0000000f] [login][client]login
[:0000000a] [nodemgr] newservice begin name=agent
[:00000014] LAUNCH snlua agent agent 101
[:00000014] [service][start] name=agent id=101
[:00000014] last_day=19885 now_day=19888
[:00000014] kuatian...
[:0000000a] [nodemgr] newservice end name=agent
[:00000010] reqlogin player_id=101 success
[:0000000f] login sure_agent key=629613317
[:0000000f] login success 101
[:0000000c] send 5 [login] {login,0,101,629613317,login success}
[:0000000c] socket close 5
connect from 127.0.0.1:37462 6
[:0000000c] socket connectted 6
[:0000000c] recv 6 [reconnect] {reconnect,101,629613317}
[:0000000c] send 6 [reconect] {reconect,0}
client
server@server-VirtualBox:~$ telnet 0.0.0.0 8001
Trying 0.0.0.0...
Connected to 0.0.0.0.
Escape character is '^]'.
login,101,123
login,0,101,629613317,login success
^]
telnet> q
Connection closed.
server@server-VirtualBox:~$ telnet 0.0.0.0 8001
Trying 0.0.0.0...
Connected to 0.0.0.0.
Escape character is '^]'.
reconnect,101,629613317
reconect,0
2、掉线超时踢下线
agentmgr/init.lua
s.resp.reqkick = function(source, player_id, reason)
local mplayer = players[player_id]
if not mplayer then
return false
end
-- 游戏中不能登出
if mplayer.status ~= STATUS.GAME then
return false
end
local pnode = mplayer.node
local pagent = mplayer.agent
local pgate = mplayer.gate
mplayer.status = STATUS.LOGOUT
s.call(pnode, pagent, "kick")
s.send(pnode, pagent, "exit")
s.send(pnode, pgate, "kick", player_id)
players[player_id] = nil
skynet.error("agentmgr reqkick player_id="..player_id.." reason="..reason)
return true
end
server
root@server-VirtualBox:/home/bbb_fight# sh start.sh 1
[:00000002] LAUNCH snlua bootstrap
[:00000003] LAUNCH snlua launcher
[:00000004] LAUNCH snlua cdummy
[:00000005] LAUNCH harbor 0 4
[:00000006] LAUNCH snlua datacenterd
[:00000007] LAUNCH snlua service_mgr
[:00000008] LAUNCH snlua main
[:00000009] LAUNCH snlua clusterd
[:0000000a] LAUNCH snlua nodemgr nodemgr 0
[:0000000a] [service][start] name=nodemgr id=0
[:0000000b] LAUNCH snlua gate
[:0000000b] Listen on 127.0.0.1:7771
[:0000000c] LAUNCH snlua gateway gateway 1
[:0000000c] [service][start] name=gateway id=1
[:0000000c] gate init port=8001
[:0000000c] Listen socket: 0.0.0.0 8001
[:0000000d] LAUNCH snlua gateway gateway 2
[:0000000d] [service][start] name=gateway id=2
[:0000000d] gate init port=8002
[:0000000d] Listen socket: 0.0.0.0 8002
[:0000000e] LAUNCH snlua login login 1
[:0000000e] [service][start] name=login id=1
[:0000000f] LAUNCH snlua login login 2
[:0000000f] [service][start] name=login id=2
[:00000010] LAUNCH snlua agentmgr agentmgr 0
[:00000010] [service][start] name=agentmgr id=0
[:00000011] LAUNCH snlua scene scene 1001
[:00000011] [service][start] name=scene id=1001
[:00000012] LAUNCH snlua scene scene 1002
[:00000012] [service][start] name=scene id=1002
[:00000013] LAUNCH snlua admin admin 1001
[:00000013] [service][start] name=admin id=1001
[:00000013] admin init listenfd=4
[:00000008] KILL self
[:00000002] KILL self
connect from 127.0.0.1:39760 5
[:0000000c] socket connectted 5
[:0000000c] recv 5 [login] {login,101,123}
[:0000000e] [login][client]login
[:0000000a] [nodemgr] newservice begin name=agent
[:00000014] LAUNCH snlua agent agent 101
[:00000014] [service][start] name=agent id=101
[:00000014] last_day=19885 now_day=19888
[:00000014] kuatian...
[:0000000a] [nodemgr] newservice end name=agent
[:00000010] reqlogin player_id=101 success
[:0000000e] login sure_agent key=435252744
[:0000000e] login success 101
[:0000000c] send 5 [login] {login,0,101,435252744,login success}
[:0000000c] socket close 5
#15秒后
[:00000010] agentmgr reqkick player_id=101 reason=diconnect timeout
[:00000014] KILL self
client
server@server-VirtualBox:~$ telnet 0.0.0.0 8001
Trying 0.0.0.0...
Connected to 0.0.0.0.
Escape character is '^]'.
login,101,123
login,0,101,435252744,login success
^]
telnet> q
Connection closed.
可以看到,服务端掉线后agent服务(014)并没有退出,而是等待一段时间后(代码设置的定时器)才触发下线流程。
3、消息缓存
gateway/init.lua
local process_reconnect = function(fd, msg)
local player_id = tonumber(msg[2])
local key = tonumber(msg[3])
--con
local conn = conns[fd]
if not conn then
skynet.error("reconnect fail,conn not exist")
return
end
--gplayer
local gplayer = players[player_id]
if not gplayer then
skynet.error("reconnect fail,gplayer not exist")
return
end
if gplayer.conn then
skynet.error("reconnect fail,conn not break")
return
end
if gplayer.key ~= key then
skynet.error("reconnect fail,key error")
return
end
--bind
gplayer.conn = conn
conn.player_id = player_id
--response
s.resp.send_by_fd(nil, fd, {"reconect", 0})
--msgcache
for _,_msg in ipairs(gplayer.msgcache) do
s.resp.send_by_fd(nil, fd, _msg)
end
end
server
root@server-VirtualBox:/home/bbb_fight# sh start.sh 1
[:00000002] LAUNCH snlua bootstrap
[:00000003] LAUNCH snlua launcher
[:00000004] LAUNCH snlua cdummy
[:00000005] LAUNCH harbor 0 4
[:00000006] LAUNCH snlua datacenterd
[:00000007] LAUNCH snlua service_mgr
[:00000008] LAUNCH snlua main
[:00000009] LAUNCH snlua clusterd
[:0000000a] LAUNCH snlua nodemgr nodemgr 0
[:0000000a] [service][start] name=nodemgr id=0
[:0000000b] LAUNCH snlua gate
[:0000000b] Listen on 127.0.0.1:7771
[:0000000c] LAUNCH snlua gateway gateway 1
[:0000000c] [service][start] name=gateway id=1
[:0000000c] gate init port=8001
[:0000000c] Listen socket: 0.0.0.0 8001
[:0000000d] LAUNCH snlua gateway gateway 2
[:0000000d] [service][start] name=gateway id=2
[:0000000d] gate init port=8002
[:0000000d] Listen socket: 0.0.0.0 8002
[:0000000e] LAUNCH snlua login login 1
[:0000000e] [service][start] name=login id=1
[:0000000f] LAUNCH snlua login login 2
[:0000000f] [service][start] name=login id=2
[:00000010] LAUNCH snlua agentmgr agentmgr 0
[:00000010] [service][start] name=agentmgr id=0
[:00000011] LAUNCH snlua scene scene 1001
[:00000011] [service][start] name=scene id=1001
[:00000012] LAUNCH snlua scene scene 1002
[:00000012] [service][start] name=scene id=1002
[:00000013] LAUNCH snlua admin admin 1001
[:00000013] [service][start] name=admin id=1001
[:00000013] admin init listenfd=4
[:00000008] KILL self
[:00000002] KILL self
connect from 127.0.0.1:37784 5
[:0000000c] socket connectted 5
[:0000000c] recv 5 [login] {login,101,123}
[:0000000f] [login][client]login
[:0000000a] [nodemgr] newservice begin name=agent
[:00000014] LAUNCH snlua agent agent 101
[:00000014] [service][start] name=agent id=101
[:00000014] last_day=19885 now_day=19888
[:00000014] kuatian...
[:0000000a] [nodemgr] newservice end name=agent
[:00000010] reqlogin player_id=101 success
[:0000000f] login sure_agent key=763086078
[:0000000f] login success 101
[:0000000c] send 5 [login] {login,0,101,763086078,login success}
[:0000000c] recv 5 [enter] {enter}
[:00000014] random_scene i=node2
[:00000014] random_scene i=node1
[:00000011] scene send log msg=enter,101,16,56,2
[:0000000c] send 5 [enter] {enter,101,16,56,2}
[:0000000c] send 5 [balllist] {balllist,16,56,2}
[:0000000c] send 5 [foodlist] {foodlist,1,45,86,2,80,4}
[:00000011] scene send log msg=addfood,3,79,4
[:0000000c] send 5 [addfood] {addfood,3,79,4}
[:0000000c] socket close 5
[:00000011] scene send log msg=addfood,4,87,78
[:00000011] scene send log msg=addfood,5,93,85
[:00000011] scene send log msg=addfood,6,21,34
connect from 127.0.0.1:43018 6
[:0000000c] socket connectted 6
[:00000011] scene send log msg=addfood,7,91,68
[:00000011] scene send log msg=addfood,8,77,41
[:00000011] scene send log msg=addfood,9,89,22
[:00000011] scene send log msg=addfood,10,34,90
[:0000000c] recv 6 [reconnect] {reconnect,101,763086078}
[:0000000c] send 6 [reconect] {reconect,0}
[:0000000c] send 6 [addfood] {addfood,4,87,78}
[:0000000c] send 6 [addfood] {addfood,5,93,85}
[:0000000c] send 6 [addfood] {addfood,6,21,34}
[:0000000c] send 6 [addfood] {addfood,7,91,68}
[:0000000c] send 6 [addfood] {addfood,8,77,41}
[:0000000c] send 6 [addfood] {addfood,9,89,22}
[:0000000c] send 6 [addfood] {addfood,10,34,90}
[:00000011] scene send log msg=addfood,11,86,21
[:0000000c] send 6 [addfood] {addfood,11,86,21}
[:00000011] scene send log msg=addfood,12,63,33
[:0000000c] send 6 [addfood] {addfood,12,63,33}
client
server@server-VirtualBox:~$ telnet 0.0.0.0 8001
Trying 0.0.0.0...
Connected to 0.0.0.0.
Escape character is '^]'.
login,101,123
login,0,101,763086078,login success
enter
enter,101,16,56,2
balllist,16,56,2
foodlist,1,45,86,2,80,4
addfood,3,79,4
^]
telnet> q
Connection closed.
server@server-VirtualBox:~$ telnet 0.0.0.0 8001
Trying 0.0.0.0...
Connected to 0.0.0.0.
Escape character is '^]'.
reconnect,101,763086078
reconect,0
addfood,4,87,78
addfood,5,93,85
addfood,6,21,34
addfood,7,91,68
addfood,8,77,41
addfood,9,89,22
addfood,10,34,90
addfood,11,86,21
addfood,12,63,33