使用Mongo进行复制和分片

以下是摘要

shardingすると可用性が問題になってくるのでレプリケーションと組み合わせて構成を組んで対処することもある。下記の図のようにshardingしているサーバが1台でもだめになると障害となってしまう。

01.png

そこで下記のようにshardingとレプリケーションを組み合わせて構成を組む。
db1にシャーディングしていたshard_Aをdb2とdb3にレプリケーションする。
db2にシャーディングしていたshard_Bをdb1とdb3にレプリケーションする。
db3にシャーディングしていたshard_Cをdb1とdb2にレプリケーションする。
以後さらにスケールアウトすることを想定してArbiterを現時点から入れておくものとする。Arbiterはデータを保持しないため、サーバ1台の障害にまで耐えられるという想定だ。

02.png

もしdb2に障害が起きてダウンしてしまった場合は下記のようになる。

03.png

shard_Aのレプリカセットはdb1がそのままPrimaryとなる。
shard_BのレプリカセットのArbiterはdb3のshard_BのSecondaryをPrimaryに昇格させる。
shard_Cのレプリカセットはdb3がそのままPrimaryとなる。

准备

在EC2上尝试以四台服务器进行部署。
Mongo的版本为3.2.9。
核心服务器1、数据库服务器1、数据库服务器2、数据库服务器3。
在核心服务器1上启动配置服务器和mongos服务器。

wget http://downloads.mongodb.org/linux/mongodb-linux-x86_64-rhel62-3.2.9.tgz

配置和启动configsvr和mongos。

[root@core1 mongodb]# cat conf/config.conf 
port = 27001
dbpath = /root/mongodb/data/configdb
fork = true
configsvr = true
logpath = /root/mongodb/logs/config.log
logappend = yes
smallfiles = true

[root@core1 mongodb]# cat conf/mongos.conf                                                                               
port = 27000
configdb = core1:27001
chunkSize = 2
fork = true
logpath = /root/mongodb/logs/shard.log
logappend = yes

啟動

[root@core1 mongodb]# ./bin/mongod -f conf/config.conf
[root@core1 mongodb]# ./bin/mongos -f conf/mongos.conf
[root@core1 mongodb]# pgrep -lf mongo
3259 ./bin/mongod -f conf/config.conf
3280 ./bin/mongos -f conf/mongos.conf

使用mongod进行配置和启动(db1、db2、db3)

数据库1

[root@db1 mongodb]# cat conf/rs_shard_A.conf 
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db1 mongodb]# cat conf/rs_shard_B.conf 
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db1 mongodb]# cat conf/rs_shard_C.conf 
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true

[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db1 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db1 mongodb]# pgrep -lf mongo
3180 ./bin/mongod -f conf/rs_shard_A.conf
3198 ./bin/mongod -f conf/rs_shard_B.conf
3216 ./bin/mongod -f conf/rs_shard_C.conf

DB2

[root@db2 mongodb]# cat conf/rs_shard_A.conf 
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db2 mongodb]# cat conf/rs_shard_B.conf 
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db2 mongodb]# cat conf/rs_shard_C.conf 
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true

[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db2 mongodb]# pgrep -lf mongo
3046 ./bin/mongod -f conf/rs_shard_A.conf
3064 ./bin/mongod -f conf/rs_shard_B.conf
3082 ./bin/mongod -f conf/rs_shard_C.conf

数据库3

[root@db3 mongodb]# cat conf/rs_shard_A.conf                                                                             
port = 27011
dbpath = /root/mongodb/data/rs_shard_A
fork = true
shardsvr = true
replSet = shard_A
logpath = /root/mongodb/logs/rs_shard_A.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db3 mongodb]# cat conf/rs_shard_B.conf 
port = 27012
dbpath = /root/mongodb/data/rs_shard_B
fork = true
shardsvr = true
replSet = shard_B
logpath = /root/mongodb/logs/rs_shard_B.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true
[root@db3 mongodb]# cat conf/rs_shard_C.conf 
port = 27013
dbpath = /root/mongodb/data/rs_shard_C
fork = true
shardsvr = true
replSet = shard_C
logpath = /root/mongodb/logs/rs_shard_C.log
logappend = yes
smallfiles = true
storageEngine = wiredTiger
nojournal = true

[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf
[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf
[root@db3 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf
[root@db3 mongodb]# pgrep -lf mongo
3120 ./bin/mongod -f conf/rs_shard_A.conf
3138 ./bin/mongod -f conf/rs_shard_B.conf
3156 ./bin/mongod -f conf/rs_shard_C.conf

レプリカセットの設定

在db1上进行实施。

[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
> rs.status()                                                                                                                   
{
        "info" : "run rs.initiate(...) if not yet done for the set",
        "ok" : 0,
        "errmsg" : "no replset config has been received",
        "code" : 94
}
> rs.initiate()                                                                                                                 
{
        "info2" : "no configuration specified. Using a default configuration for the set",
        "me" : "db1:27011",
        "ok" : 1
}
shard_A:OTHER> rs.add("db2:27011")                                                                                              
{ "ok" : 1 }
shard_A:PRIMARY> rs.addArb("db3:27011")                                                                                         
{ "ok" : 1 }
shard_A:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)                                    
{ "ok" : 1 }
shard_A:PRIMARY> rs.status()                                                                                                    
{
        "set" : "shard_A",
        "date" : ISODate("2017-01-29T22:46:24.314Z"),
        "myState" : 1,
        "term" : NumberLong(1),
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db1:27011",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 1381,
                        "optime" : {
                                "ts" : Timestamp(1485729778, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:42:58Z"),
                        "electionTime" : Timestamp(1485729680, 2),
                        "electionDate" : ISODate("2017-01-29T22:41:20Z"),
                        "configVersion" : 4,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "db2:27011",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 243,
                        "optime" : {
                                "ts" : Timestamp(1485729778, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:42:58Z"),
                        "lastHeartbeat" : ISODate("2017-01-29T22:46:22.668Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:46:22.668Z"),
                        "pingMs" : NumberLong(0),
                        "syncingTo" : "db1:27011",
                        "configVersion" : 4
                },
                {
                        "_id" : 2,
                        "name" : "db3:27011",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 235,
                        "lastHeartbeat" : ISODate("2017-01-29T22:46:22.668Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:46:23.611Z"),
                        "pingMs" : NumberLong(0),
                        "configVersion" : 4
                }
        ],
        "ok" : 1
}

在DB2中进行实施。

[root@db2 mongodb]# ./bin/mongo --port 27012                                                                                    
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
> rs.status()                                                                                                                   
{
        "info" : "run rs.initiate(...) if not yet done for the set",
        "ok" : 0,
        "errmsg" : "no replset config has been received",
        "code" : 94
}
> rs.initiate()                                                                                                                 
{
        "info2" : "no configuration specified. Using a default configuration for the set",
        "me" : "db2:27012",
        "ok" : 1
}
shard_B:OTHER> rs.add("db3:27012")                                                                                              
{ "ok" : 1 }
shard_B:PRIMARY> rs.addArb("db1:27012")                                                                                         
{ "ok" : 1 }
shard_B:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)                                    
{ "ok" : 1 }
shard_B:PRIMARY> rs.status()                                                                                                    
{
        "set" : "shard_B",
        "date" : ISODate("2017-01-29T22:45:17.677Z"),
        "myState" : 1,
        "term" : NumberLong(1),
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db2:27012",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 1279,
                        "optime" : {
                                "ts" : Timestamp(1485729913, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:45:13Z"),
                        "infoMessage" : "could not find member to sync from",
                        "electionTime" : Timestamp(1485729859, 2),
                        "electionDate" : ISODate("2017-01-29T22:44:19Z"),
                        "configVersion" : 4,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "db3:27012",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 21,
                        "optime" : {
                                "ts" : Timestamp(1485729913, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:45:13Z"),
                        "lastHeartbeat" : ISODate("2017-01-29T22:45:15.826Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:45:15.831Z"),
                        "pingMs" : NumberLong(0),
                        "syncingTo" : "db2:27012",
                        "configVersion" : 4
                },
                {
                        "_id" : 2,
                        "name" : "db1:27012",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 14,
                        "lastHeartbeat" : ISODate("2017-01-29T22:45:15.826Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:45:13.830Z"),
                        "pingMs" : NumberLong(0),
                        "configVersion" : 4
                }
        ],
        "ok" : 1
}
shard_B:PRIMARY> 

在db3上执行。

[root@db3 mongodb]# ./bin/mongo --port 27013                                                                                    
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
> rs.status()                                                                                                                   
{
        "info" : "run rs.initiate(...) if not yet done for the set",
        "ok" : 0,
        "errmsg" : "no replset config has been received",
        "code" : 94
}
> rs.initiate()                                                                                                                 
{
        "info2" : "no configuration specified. Using a default configuration for the set",
        "me" : "db3:27013",
        "ok" : 1
}
shard_C:OTHER> rs.add("db1:27013")                                                                                              
{ "ok" : 1 }
shard_C:PRIMARY> rs.addArb("db2:27013")                                                                                         
{ "ok" : 1 }
shard_C:PRIMARY> var config = rs.config(); config.members[0].priority=2; rs.reconfig(config)                                    
{ "ok" : 1 }
shard_C:PRIMARY> rs.status()                                                                                                    
{
        "set" : "shard_C",
        "date" : ISODate("2017-01-29T22:47:31.366Z"),
        "myState" : 1,
        "term" : NumberLong(1),
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db3:27013",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 1380,
                        "optime" : {
                                "ts" : Timestamp(1485730047, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:47:27Z"),
                        "infoMessage" : "could not find member to sync from",
                        "electionTime" : Timestamp(1485730012, 2),
                        "electionDate" : ISODate("2017-01-29T22:46:52Z"),
                        "configVersion" : 4,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "db1:27013",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 18,
                        "optime" : {
                                "ts" : Timestamp(1485730047, 1),
                                "t" : NumberLong(1)
                        },
                        "optimeDate" : ISODate("2017-01-29T22:47:27Z"),
                        "lastHeartbeat" : ISODate("2017-01-29T22:47:31.233Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:47:31.238Z"),
                        "pingMs" : NumberLong(0),
                        "syncingTo" : "db3:27013",
                        "configVersion" : 4
                },
                {
                        "_id" : 2,
                        "name" : "db2:27013",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 11,
                        "lastHeartbeat" : ISODate("2017-01-29T22:47:31.233Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-29T22:47:27.237Z"),
                        "pingMs" : NumberLong(0),
                        "configVersion" : 4
                }
        ],
        "ok" : 1
}
shard_C:PRIMARY> 

分片设置

在core1上执行。由于仲裁器不保存数据,所以不进行分片。

mongos> sh.status()                                                                                                             
--- Sharding Status --- 
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
  shards:
  active mongoses:
        "3.2.9" : 1
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours: 
                No recent migrations
  databases:

mongos> sh.addShard("shard_A/db1:27011,db2:27011")                                                                              
{ "shardAdded" : "shard_A", "ok" : 1 }
mongos> sh.status()                                                                                                             
--- Sharding Status --- 
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
  shards:
        {  "_id" : "shard_A",  "host" : "shard_A/db1:27011,db2:27011" }
  active mongoses:
        "3.2.9" : 1
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours: 
                No recent migrations
  databases:
mongos> sh.addShard("shard_B/db2:27012,db3:27012")                                                                              
{ "shardAdded" : "shard_B", "ok" : 1 }
mongos> sh.status()                                                                                                             
--- Sharding Status --- 
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
  shards:
        {  "_id" : "shard_A",  "host" : "shard_A/db1:27011,db2:27011" }
        {  "_id" : "shard_B",  "host" : "shard_B/db2:27012,db3:27012" }
  active mongoses:
        "3.2.9" : 1
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours: 
                No recent migrations
  databases:
mongos> sh.addShard("shard_C/db3:27013,db1:27013")                                                                              
{ "shardAdded" : "shard_C", "ok" : 1 }
mongos> sh.status()                                                                                                             
--- Sharding Status --- 
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
  shards:
        {  "_id" : "shard_A",  "host" : "shard_A/db1:27011,db2:27011" }
        {  "_id" : "shard_B",  "host" : "shard_B/db2:27012,db3:27012" }
        {  "_id" : "shard_C",  "host" : "shard_C/db1:27013,db3:27013" }
  active mongoses:
        "3.2.9" : 1
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours: 
                No recent migrations
  databases:

数据输入

从core1的mongos执行命令并插入数据。

[root@core1 mongodb]# ./bin/mongo --port 27000
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27000/test
mongos> use logdb                                                                                                                             
switched to db logdb
mongos> for(var i=1; i<=100000; i++)db.logs.insert({"uid":i, "value":Math.floor(Math.random()*100000+1)})                                     
WriteResult({ "nInserted" : 1 })

贴上索引并执行分片。

完成分片后,确认状态后不久,可以确认分散在分片A、分片B和分片C中。

mongos> use logdb
switched to db logdb
mongos> db.logs.ensureIndex({uid:1})
{
        "raw" : {
                "shard_B/db2:27012,db3:27012" : {
                        "createdCollectionAutomatically" : false,
                        "numIndexesBefore" : 1,
                        "numIndexesAfter" : 2,
                        "ok" : 1,
                        "$gleStats" : {
                                "lastOpTime" : Timestamp(1485741329, 1),
                                "electionId" : ObjectId("7fffffff0000000000000002")
                        }
                }
        },
        "ok" : 1
}
mongos> sh.enableSharding("logdb")
{ "ok" : 1 }
mongos> sh.shardCollection("logdb.logs", {uid:1})
{ "collectionsharded" : "logdb.logs", "ok" : 1 }

mongos> sh.status()                                                                                                   
--- Sharding Status --- 
  sharding version: {
        "_id" : 1,
        "minCompatibleVersion" : 5,
        "currentVersion" : 6,
        "clusterId" : ObjectId("588dddf2eb2e2130bffee5c6")
}
  shards:
        {  "_id" : "shard_A",  "host" : "shard_A/db1:27011,db2:27011" }
        {  "_id" : "shard_B",  "host" : "shard_B/db2:27012,db3:27012" }
        {  "_id" : "shard_C",  "host" : "shard_C/db1:27013,db3:27013" }
  active mongoses:
        "3.2.9" : 1
  balancer:
        Currently enabled:  yes
        Currently running:  no
        Failed balancer rounds in last 5 attempts:  0
        Migration Results for the last 24 hours: 
                6 : Success
  databases:
        {  "_id" : "logdb",  "primary" : "shard_B",  "partitioned" : true }
                logdb.logs
                        shard key: { "uid" : 1 }
                        unique: false
                        balancing: true
                        chunks:
                                shard_A 3
                                shard_B 4
                                shard_C 3
                        { "uid" : { "$minKey" : 1 } } -->> { "uid" : 10486 } on : shard_A Timestamp(2, 0) 
                        { "uid" : 10486 } -->> { "uid" : 20972 } on : shard_C Timestamp(3, 0) 
                        { "uid" : 20972 } -->> { "uid" : 31458 } on : shard_A Timestamp(4, 0) 
                        { "uid" : 31458 } -->> { "uid" : 41944 } on : shard_C Timestamp(5, 0) 
                        { "uid" : 41944 } -->> { "uid" : 52430 } on : shard_A Timestamp(6, 0) 
                        { "uid" : 52430 } -->> { "uid" : 62916 } on : shard_C Timestamp(7, 0) 
                        { "uid" : 62916 } -->> { "uid" : 73402 } on : shard_B Timestamp(7, 1) 
                        { "uid" : 73402 } -->> { "uid" : 83888 } on : shard_B Timestamp(1, 7) 
                        { "uid" : 83888 } -->> { "uid" : 94374 } on : shard_B Timestamp(1, 8) 
                        { "uid" : 94374 } -->> { "uid" : { "$maxKey" : 1 } } on : shard_B Timestamp(1, 9)

检查各个主要的复制品以确认其正常性。

[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:PRIMARY> use logdb
 switched to db logdb
shard_A:PRIMARY> db.logs.count()
31457


[root@db2 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:PRIMARY> use logdb  
switched to db logdb
shard_B:PRIMARY> db.logs.count()
37085

[root@db3 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:PRIMARY> use logdb
switched to db logdb
shard_C:PRIMARY> db.logs.count()
31458

[root@core1 mongodb]# python -c "print(31457 + 37085 + 31458)"                                                        
100000

确认次要问题也没有任何问题

[root@db2 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:SECONDARY> db.getMongo().setSlaveOk()
shard_A:SECONDARY> use logdb
switched to db logdb
shard_A:SECONDARY> db.logs.count()
31457

[root@db1 mongodb]# ./bin/mongo --port 27013
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:SECONDARY> db.getMongo().setSlaveOk()
shard_C:SECONDARY> use logdb
switched to db logdb
shard_C:SECONDARY> db.logs.count()
31458

[root@db3 mongodb]# ./bin/mongo --port 27012
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:SECONDARY> db.getMongo().setSlaveOk()
shard_B:SECONDARY> use logdb
switched to db logdb
shard_B:SECONDARY> db.logs.count()
37085

让db2发生故障尝试。

杀死DB2的mongod进程。

[root@db2 mongodb]# pgrep -lf mongo
2572 ./bin/mongod -f conf/rs_shard_A.conf
2642 ./bin/mongod -f conf/rs_shard_B.conf
2712 ./bin/mongod -f conf/rs_shard_C.conf
[root@db2 mongodb]# pkill -f mongo
03.png

上記のような状態になって正常稼動していることを確認する。
レプリケーションなしのシャーディングでは障害発生状態となってmongodbが使えなくなるがレプリカ+シャーディング構成にしていたおかげでshard_Bがdb3で使える状態なのでまだ大丈夫。

A片状态

[root@db1 mongodb]# ./bin/mongo --port 27011
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27011/test
shard_A:PRIMARY> rs.status()                                                                                          
{
        "set" : "shard_A",
        "date" : ISODate("2017-01-30T02:29:51.989Z"),
        "myState" : 1,
        "term" : NumberLong(2),
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db1:27011",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 7048,
                        "optime" : {
                                "ts" : Timestamp(1485741566, 280),
                                "t" : NumberLong(2)
                        },
                        "optimeDate" : ISODate("2017-01-30T01:59:26Z"),
                        "electionTime" : Timestamp(1485736504, 1),
                        "electionDate" : ISODate("2017-01-30T00:35:04Z"),
                        "configVersion" : 4,
                        "self" : true
                },
                {
                        "_id" : 1,
                        "name" : "db2:27011",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : {
                                "ts" : Timestamp(0, 0),
                                "t" : NumberLong(-1)
                        },
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2017-01-30T02:29:50.504Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:23:57.064Z"),
                        "pingMs" : NumberLong(0),
                        "lastHeartbeatMessage" : "Connection refused",
                        "configVersion" : -1
                },
                {
                        "_id" : 2,
                        "name" : "db3:27011",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 6867,
                        "lastHeartbeat" : ISODate("2017-01-30T02:29:51.893Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:29:48.614Z"),
                        "pingMs" : NumberLong(0),
                        "configVersion" : 4
                }
        ],
        "ok" : 1
}

shard_B的状态

我们可以看到主要的db3已经移动了。

[root@db1 mongodb]# ./bin/mongo --port 27012                                                                          
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27012/test
shard_B:ARBITER> rs.status()                                                                                          
{
        "set" : "shard_B",
        "date" : ISODate("2017-01-30T02:35:23.035Z"),
        "myState" : 7,
        "term" : NumberLong(3),
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db2:27012",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "optime" : {
                                "ts" : Timestamp(0, 0),
                                "t" : NumberLong(-1)
                        },
                        "optimeDate" : ISODate("1970-01-01T00:00:00Z"),
                        "lastHeartbeat" : ISODate("2017-01-30T02:35:18.606Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:23:57.256Z"),
                        "pingMs" : NumberLong(0),
                        "lastHeartbeatMessage" : "Connection refused",
                        "configVersion" : -1
                },
                {
                        "_id" : 1,
                        "name" : "db3:27012",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 7196,
                        "optime" : {
                                "ts" : Timestamp(1485743048, 1),
                                "t" : NumberLong(3)
                        },
                        "optimeDate" : ISODate("2017-01-30T02:24:08Z"),
                        "lastHeartbeat" : ISODate("2017-01-30T02:35:18.336Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:35:22.271Z"),
                        "pingMs" : NumberLong(0),
                        "electionTime" : Timestamp(1485743047, 1),
                        "electionDate" : ISODate("2017-01-30T02:24:07Z"),
                        "configVersion" : 4
                },
                {
                        "_id" : 2,
                        "name" : "db1:27012",
                        "health" : 1,
                        "state" : 7,
                        "stateStr" : "ARBITER",
                        "uptime" : 7377,
                        "configVersion" : 4,
                        "self" : true
                }
        ],
        "ok" : 1
}

shard_Cの状態

[root@db1 mongodb]# ./bin/mongo --port 27013                                                                          
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27013/test
shard_C:SECONDARY> rs.status()                                                                                        
{
        "set" : "shard_C",
        "date" : ISODate("2017-01-30T02:36:48.177Z"),
        "myState" : 2,
        "term" : NumberLong(3),
        "syncingTo" : "db3:27013",
        "heartbeatIntervalMillis" : NumberLong(2000),
        "members" : [
                {
                        "_id" : 0,
                        "name" : "db3:27013",
                        "health" : 1,
                        "state" : 1,
                        "stateStr" : "PRIMARY",
                        "uptime" : 7279,
                        "optime" : {
                                "ts" : Timestamp(1485741598, 78),
                                "t" : NumberLong(3)
                        },
                        "optimeDate" : ISODate("2017-01-30T01:59:58Z"),
                        "lastHeartbeat" : ISODate("2017-01-30T02:36:48.099Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:36:47.471Z"),
                        "pingMs" : NumberLong(0),
                        "electionTime" : Timestamp(1485736537, 1),
                        "electionDate" : ISODate("2017-01-30T00:35:37Z"),
                        "configVersion" : 4
                },
                {
                        "_id" : 1,
                        "name" : "db1:27013",
                        "health" : 1,
                        "state" : 2,
                        "stateStr" : "SECONDARY",
                        "uptime" : 7459,
                        "optime" : {
                                "ts" : Timestamp(1485741598, 78),
                                "t" : NumberLong(3)
                        },
                        "optimeDate" : ISODate("2017-01-30T01:59:58Z"),
                        "syncingTo" : "db3:27013",
                        "configVersion" : 4,
                        "self" : true
                },
                {
                        "_id" : 2,
                        "name" : "db2:27013",
                        "health" : 0,
                        "state" : 8,
                        "stateStr" : "(not reachable/healthy)",
                        "uptime" : 0,
                        "lastHeartbeat" : ISODate("2017-01-30T02:36:46.424Z"),
                        "lastHeartbeatRecv" : ISODate("2017-01-30T02:23:58.153Z"),
                        "pingMs" : NumberLong(0),
                        "lastHeartbeatMessage" : "Connection refused",
                        "configVersion" : -1
                }
        ],
        "ok" : 1
}

从mongos进行正常性检查

关于aggregate命令,请参考count文档。

[root@core1 mongodb]# ./bin/mongo --port 27000
MongoDB shell version: 3.2.9
connecting to: 127.0.0.1:27000/test
mongos> use logdb                                                                                                     
switched to db logdb
mongos> db.logs.count()                                                                                               
100000
mongos> db.logs.aggregate([{$group:{_id:null, count:{$sum:1}}}])                                                                                   
{ "_id" : null, "count" : 100000 }

保持障碍状态下的数据输入。

由于使用mongos进行循环投入出现意外错误,如在中途冻结等问题,因此采用了Python客户端。

# python3でpymongoは3.4.0
from pymongo import MongoClient                                                                                                                    
from pprint import pprint                                                                                                                          


client = MongoClient("core1", 27000)                                                                                                               
#print(client.logdb.logs.count())                                                                                                                  
                                                                                                                                                   bulk = client.logdb.logs.initialize_ordered_bulk_op()                                                                                              

for uid in range(100001, 200001):                                                                                                                       
    bulk.insert({ "uid": uid, "value": "hogehoge"})

pprint(bulk.execute())                        

按照上述,使用批量插入在Python客户端执行,但在发生故障时,有时能够全部插入,有时却不能。现象是直接冻结,没有任何进展。通过strace检查,看不到任何输出。

Process 4250 attached
recvfrom(5, ^CProcess 4250 detached
 <detached ...>

将db2中的mongod进程恢复为3个,使其恢复到正常状态,并解除冻结,并恢复批量插入操作,但是会出现大量无法正常输入的数据。目前还没有找到原因。对于客户端来说,只能通过设置超时时间或在服务器宕机时及时从警报通知中进行恢复工作来处理。

db2从故障中恢复

[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_A.conf                                                             
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_B.conf 
[root@db2 mongodb]# ./bin/mongod -f conf/rs_shard_C.conf 
[root@db2 mongodb]# pgrep -lf mongo
4015 ./bin/mongod -f conf/rs_shard_A.conf
4090 ./bin/mongod -f conf/rs_shard_B.conf
4164 ./bin/mongod -f conf/rs_shard_C.conf
广告
将在 10 秒后关闭
bannerAds