1 测试代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| int main() { rocksdb::Options options; options.create_if_missing = true;
std::string dbName = "/tmp/rocksdb_ctest_read"; std::string walDir = dbName + "/wal"; std::string sstDir = dbName + "/sst"; options.wal_dir = walDir; std::vector<rocksdb::DbPath> sstPaths = {{sstDir + "/flash_path", 512}, {sstDir + "/hard_drive", 1024}}; options.db_paths = sstPaths;
auto* env = rocksdb::Env::Default(); env->CreateDirIfMissing(dbName); env->CreateDirIfMissing(walDir); env->CreateDirIfMissing(sstDir); env->CreateDirIfMissing(sstDir + "/flash_path"); env->CreateDirIfMissing(sstDir + "/hard_drive");
std::unique_ptr<rocksdb::DB> db; auto s = rocksdb::DB::Open(options, dbName, &db); assert(s.ok()); s = db->Put(rocksdb::WriteOptions(), "hello", "world"); assert(s.ok()); std::string val; s = db->Get(rocksdb::ReadOptions(), "hello", &val); assert(s.ok()); std::cout<<"key=hello, value="<<val<<std::endl; return 0; }
|
2 关于字符串
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| virtual inline Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family, const Slice& key, std::string* value) final { assert(value != nullptr); PinnableSlice pinnable_val(value); assert(!pinnable_val.IsPinned()); auto s = Get(options, column_family, key, &pinnable_val); if (s.ok() && pinnable_val.IsPinned()) { value->assign(pinnable_val.data(), pinnable_val.size()); } return s; }
|
虽然用的是std::string,名称叫string,但是不能只是狭隘地理解成字符串,而是字节序列。针对string,RocksDB有两个自己的场景优化
3 构造LookupKey
这个东西存在的目的是查询,把查询条件编码到了一起
想象一下现在需要在有序集合中查询Seek(key = (a, snapshot)),转换成的语义是
1 2 3
| WHERE key = a AND sequence <= snapshot ORDER BY sequence DESC LIMIT 1
|
直接看内存布局
它的三个成员
1 2 3
| const char* start_; const char* kstart_; const char* end_;
|
它的构造函数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
|
LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s, const Slice* ts) { size_t usize = _user_key.size(); size_t ts_sz = (nullptr == ts) ? 0 : ts->size(); size_t needed = usize + ts_sz + 13; char* dst; if (needed <= sizeof(space_)) { dst = space_; } else { dst = new char[needed]; } start_ = dst; dst = EncodeVarint32(dst, static_cast<uint32_t>(usize + ts_sz + 8)); kstart_ = dst; memcpy(dst, _user_key.data(), usize); dst += usize; if (nullptr != ts) { memcpy(dst, ts->data(), ts_sz); dst += ts_sz; } EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek)); dst += 8; end_ = dst; }
|
上面的流程看到会需要seq作为key的检索条件,这个seq就是MVCC读隔离发挥威力的地方RocksDB源码-0x11-MVCC
4 从快照里面读数据
在真正的读数据之前,非常重要的是拿到数据的快照RocksDB源码-0x19-列簇的快照
4.1 先从MemTable里面读
RocksDB源码-0x12-内存数据MemoryTable
4.2 MemTable在落盘过程中从冻结的MemTable里面读
RocksDB源码-0x1A-冻结的MemTable
4.3 SST查找
RocksDB源码-0x0C-Version