graph TD
A[传统分库分表] --> B[应用层维护路由]
B --> C[跨库JOIN困难]
D[TiDB] --> E[统一SQL接口]
E --> F[自动水平分片]
F --> G[分布式事务支持]
// TiDB请求处理伪代码(简化版)
func HandleQuery(ctx context.Context, query string) Result {
// 1. SQL解析与优化
plan := optimizer.BuildPlan(query)
// 2. 计算下推判断
if canPushDown(plan) {
kvReq := convertToKVRequest(plan)
regions := pd.LocateRegions(kvReq.KeyRange)
// 3. 并行访问TiKV
results := parallelExecute(regions, func(region) {
return tikvClient.Send(region.Leader, kvReq)
})
return mergeResults(results)
}
// 4. 本地计算
return localExecute(plan)
}
// TiKV的MVCC存储结构示例(基于Rust实现)
pubstruct MvccReader {
snapshot: Snapshot,
start_ts: u64,
}
impl MvccReader {
pubfn get(&self, key: &[u8]) -> Result<Option<Value>> {
let lock = self.snapshot.get_lock(key)?;
ifletSome(lock) = lock {
if lock.ts <= self.start_ts {
returnErr(Error::KeyIsLocked);
}
}
// 读取历史版本
letmut iter = self.snapshot.iter(key);
whileletSome((ts, value)) = iter.next()? {
if ts <= self.start_ts {
returnOk(Some(value));
}
}
Ok(None)
}
}
# TiKV的多Raft组调度算法(伪代码)
class RaftScheduler:
def __init__(self):
self.groups = {} # region_id -> raft_group
self.busy_nodes = set()
def schedule_heartbeat(self):
for group in self.groups.values():
if group.leader_node notin self.busy_nodes:
group.send_heartbeat()
else:
self.rebalance_leader(group)
def rebalance_leader(self, group):
new_leader = find_least_loaded_follower(group)
group.transfer_leadership(new_leader)
// Java应用使用悲观事务
try (Connection conn = ds.getConnection()) {
conn.setAutoCommit(false);
// 1. 开启悲观事务模式
conn.createStatement().execute("SET tidb_txn_mode = 'pessimistic'");
// 2. 先查询后更新(带锁)
ResultSet rs = conn.createStatement().executeQuery(
"SELECT balance FROM accounts WHERE id = 1001 FOR UPDATE");
// 3. 业务逻辑处理
BigDecimal newBalance = rs.getBigDecimal(1).subtract(amount);
PreparedStatement ps = conn.prepareStatement(
"UPDATE accounts SET balance = ? WHERE id = 1001");
ps.setBigDecimal(1, newBalance);
ps.executeUpdate();
conn.commit();
}
-- 创建列存副本
ALTERTABLE orders SET TIFLASH REPLICA 2;
-- 强制走TiFlash(TPC-H Query6优化)
SELECT/*+ read_from_storage(tiflash[lineitem]) */
sum(l_extendedprice * l_discount) as revenue
FROM lineitem
WHERE l_shipdate >= '1994-01-01'
AND l_shipdate < date_add('1994-01-01', interval'1'year)
AND l_discount between0.06 - 0.01AND0.06 + 0.01
AND l_quantity < 24;
# tidb-server配置示例
resource-control:
request-unit:
# 限制OLTP负载
oltp:
max-tasks:500
cpu-time-per-sec:0.8
# 保障OLAP资源
olap:
min-tasks:200
cpu-time-per-sec:1.2
RegionA (主中心) RegionB(灾备中心)
├──3PD节点(多数派) ├──2PDLearner
├──10TiKV节点(标签zone=a) ├──5TiKV节点(标签zone=b)
└──2TiDB节点 └──1TiDB节点
# PD的region调度策略伪代码
def handle_network_partition():
while True:
regions = get_all_regions()
for region in regions:
if len(region.available_replicas) < replication_factor:
if region.has_quorum_in_primary_zone():
downgrade_secondary_zones()
else:
trigger_emergency_repair()
# 使用DM工具进行数据迁移
./dm-worker \
--source-id="mysql-01" \
--meta="mysql://user:pass@dm-meta:3306" \
--config=./task.yaml
# task.yaml示例
name: ecommerce-migration
task-mode: all
target-database:
host: "tidb-cluster"
port: 4000
mysql-instances:
- source-id: "mysql-01"
block-allow-list: "bw-rule-1"
loader-config:
pool-size: 16
dir: "./dumped_data"
# TiDB Operator Helm配置示例
tidb:
clusterVersion: "v7.0.0"
config:
enable-local-pd: true
storageClassName: "ebs-ssd"
完整文章配套资源:
《分布式数据库TiDB:原理、优化与架构设计》

【内容简介】
本书以TiDB数据库为基础介绍分布式数据库的运行原理、性能优化和应用场景架构设计。首先,剖析分布式数据库的运行原理与架构;然后,阐述分布式数据库TiDB在表与索引的设计优化、SQL优化、系统级优化方面的方法论,通过融入多个有代表性的案例,帮助读者将方法论对应到生产实践中;最后,梳理场景选型和架构设计过程中读者应该掌握的主要知识点,并对一些分布式数据库的优势场景进行了详细介绍。