From: Patrick Schönberger Date: Sun, 26 May 2024 16:11:31 +0000 (+0200) Subject: implement object deltas X-Git-Url: https://gitweb.ps.run/ziggit/commitdiff_plain/e3543b53185b75ba1e315974f4fe6d202fae3be9?ds=sidebyside implement object deltas --- diff --git a/git.zig b/git.zig index fe82870..7d0ccdb 100644 --- a/git.zig +++ b/git.zig @@ -1,28 +1,46 @@ const std = @import("std"); const Alloc = std.mem.Allocator; +const Reader = std.io.AnyReader; +const Writer = std.io.AnyWriter; const Id = u160; const Commit = struct { - author: std.BoundedArray(u8, 64), - message: std.BoundedArray(u8, 1024), + author: []u8, + message: []u8, parent: Id, tree: Id, }; const Blob = struct { - data: std.BoundedArray(u8, 1024), + data: []u8, }; const Object = struct { - alloc: Alloc, + kind: u3, + data: []u8, + pub fn init(kind: u3, data: []u8) Object { + return .{ + .kind = kind, + .data = data, + }; + } // pub fn getCommit(self: *Object) Commit {} // pub fn getBlob(self: *Object) Blob {} }; + const PackFile = struct { alloc: Alloc, + idxFile: std.fs.File, + pckFile: std.fs.File, + objectOffsets: std.AutoArrayHashMap(Id, u32), pub fn open(alloc: Alloc, dir: std.fs.Dir) !PackFile { - var self = PackFile{ .alloc = alloc }; + var self = PackFile{ + .alloc = alloc, + .idxFile = undefined, + .pckFile = undefined, + .objectOffsets = std.AutoArrayHashMap(Id, u32).init(alloc), + }; var packDir = try dir.openDir("objects/pack", .{ .iterate = true }); defer packDir.close(); @@ -38,33 +56,241 @@ const PackFile = struct { .{idxFilename[0 .. idxFilename.len - 4]}, ); - const idxFile = try packDir.openFile(idxFilename, .{}); - const pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); - defer idxFile.close(); - defer pckFile.close(); + self.idxFile = try packDir.openFile(idxFilename, .{}); + self.pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); - const idxReader = idxFile.reader().any(); - const pckReader = pckFile.reader().any(); - - try self.parse(idxReader, pckReader); + try self.parseIndex(); } } return self; } - pub fn parse(self: *PackFile, idxReader: std.io.AnyReader, pckReader: std.io.AnyReader) !void { - _ = self; - var buffer: [16]u8 = undefined; - _ = try idxReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); - _ = try pckReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); + + pub fn close(self: *PackFile) void { + self.objectOffsets.deinit(); + self.idxFile.close(); + self.pckFile.close(); + } + + pub fn parseIndex(self: *PackFile) !void { + const idxReader = self.idxFile.reader().any(); + + var fanoutTable: [256]u32 = undefined; + + for (0..256) |i| { + try self.idxFile.seekTo(8 + i * 4); + fanoutTable[i] = try idxReader.readVarInt(u32, .big, 4); + + const numObjects = + if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; + + for (0..numObjects) |j| { + const idOffset = + 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; + try self.idxFile.seekTo(idOffset); + const id = try idxReader.readVarInt(Id, .big, 20); + + try self.objectOffsets.put(id, 0); + } + } + + const numObjects = self.objectOffsets.keys().len; + for (0..numObjects) |i| { + const offsetOffset = + 4 + 4 + 4 * 256 + numObjects * (20 + 4) + i * 4; + try self.idxFile.seekTo(offsetOffset); + const offset = try idxReader.readVarInt(u32, .big, 4); + + self.objectOffsets.values()[i] = offset; + } + } + + fn getSize(reader: Reader, ignoreTypeBits: bool) !struct { size: u64, bytelen: u64 } { + var size: u64 = 0; + var counter: u6 = 0; + while (true) { + const byte = try reader.readByte(); + + if (counter == 0) { + if (ignoreTypeBits) { + const bits: u4 = @truncate(byte); + size = bits; + } else { + const bits: u7 = @truncate(byte); + size = bits; + } + } else { + if (ignoreTypeBits) { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter - 1) + 4); + } else { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter)); + } + } + + if (byte & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + return .{ + .size = size, + .bytelen = nBytes, + }; + } + + fn getOffset(reader: Reader) !struct { offset: u64, bytelen: u64 } { + var offset: u64 = 0; + var counter: u4 = 0; + while (true) { + const byte = try reader.readByte(); + + const bits: u7 = @truncate(byte); + offset <<= 7; + offset += @as(u64, bits); + + if (byte & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + if (nBytes >= 2) { + for (1..nBytes) |i| { + offset += std.math.pow(u64, 2, 7 * i); + } + } + return .{ + .offset = offset, + .bytelen = nBytes, + }; + } + + fn decompress(alloc: Alloc, reader: Reader, size: usize) ![]u8 { + const outBuffer = try alloc.alloc(u8, size); + errdefer alloc.free(outBuffer); + + var outFbs = std.io.fixedBufferStream(outBuffer); + const writer = outFbs.writer(); + + try std.compress.zlib.decompress(reader, writer); + + return outBuffer; } - // pub fn init(alloc: Alloc, path: []const u8) PackFile {} - // pub fn deinit(self: *PackFile) void {} - // pub fn getObject(self: *PackFile, id: Id) Object {} + fn applyDelta(alloc: Alloc, baseData: []const u8, deltData: []const u8) ![]u8 { + var fbs = std.io.fixedBufferStream(deltData); + const deltDataReader = fbs.reader().any(); + const baseObjectSize = try getSize(deltDataReader, false); + const resultObjectSize = try getSize(deltDataReader, false); + const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; + + const result = try alloc.alloc(u8, resultObjectSize.size); + var resultCounter: u64 = 0; + + var counter: u64 = 0; + while (true) { + const b = deltData[deltaDataOffset + counter]; + + if (b & 0b10000000 != 0) { + var dataOffset: u64 = 0; + var dataSize: u64 = 0; + var bitsSet: u8 = 0; + for (0..4) |i| { // offset bits + if (b & (@as(u64, 1) << @min(3, i)) != 0) { + dataOffset += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); + bitsSet += 1; + } + } + for (4..7) |i| { // size bits + if (b & (@as(u64, 1) << @min(6, i)) != 0) { + dataSize += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); + bitsSet += 1; + } + } + counter += bitsSet; + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + baseData[dataOffset .. dataOffset + dataSize], + ); + + resultCounter += dataSize; + } else { + const dataSize: u7 = @truncate(b); + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + deltData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], + ); + resultCounter += dataSize; + counter += dataSize; + } + + counter += 1; + if (deltaDataOffset + counter >= deltData.len) + break; + } + + return result; + } + + fn ofsDelta(self: *PackFile, offset: i64, size: usize) anyerror!Object { + const pckReader = self.pckFile.reader().any(); + + const pos = try self.pckFile.getPos(); + + try self.pckFile.seekBy(-offset); + const baseObject = try self.readObject(pckReader); + defer self.alloc.free(baseObject.data); + + try self.pckFile.seekTo(pos); + const deltaData = try decompress(self.alloc, pckReader, size); + defer self.alloc.free(deltaData); + + const objectData = try applyDelta(self.alloc, baseObject.data, deltaData); + return Object.init(baseObject.kind, objectData); + } + + fn readObject(self: *PackFile, reader: Reader) anyerror!Object { + const firstByte = try reader.readByte(); + const objectKind: u3 = @truncate(firstByte >> 4); + try self.pckFile.seekBy(-1); + const objectSize = try getSize(reader, true); + + if (objectKind == 6) { + const offset = try getOffset(reader); + return try self.ofsDelta( + @intCast(offset.offset + objectSize.bytelen + offset.bytelen), + objectSize.size, + ); + } else { + const objectData = try decompress(self.alloc, reader, objectSize.size); + return Object.init(objectKind, objectData); + } + } + + pub fn getObject(self: *PackFile, id: Id) !?Object { + if (self.objectOffsets.get(id)) |offset| { + const pckReader = self.pckFile.reader().any(); + try self.pckFile.seekTo(offset); + + return try self.readObject(pckReader); + } + return null; + } }; + const Repo = struct { alloc: Alloc, dir: std.fs.Dir, @@ -81,9 +307,12 @@ const Repo = struct { .packfile = packfile, }; } + pub fn close(self: *Repo) void { self.dir.close(); + self.packfile.close(); } + pub fn getHead(self: *Repo) !Id { // read file HEAD const head = try self.dir.readFileAlloc(self.alloc, "HEAD", 1024); @@ -97,7 +326,10 @@ const Repo = struct { // parse id from file return try std.fmt.parseUnsigned(u160, idStr, 16); } - // pub fn getObject(self: *Repo, id: Id) Object {} + + pub fn getObject(self: *Repo, id: Id) !?Object { + return self.packfile.getObject(id); + } }; test "print HEAD" { @@ -109,6 +341,37 @@ test "print HEAD" { std.debug.print("HEAD: {}\n", .{head}); } +test "parse idx" { + var repo = try Repo.open(std.testing.allocator, "../microwindows/.git"); + defer repo.close(); + + std.debug.print("{}\n", .{repo.packfile.objectOffsets.keys().len}); + std.debug.print("{}\n", .{repo.packfile.objectOffsets.values().len}); +} + +test "get object" { + var repo = try Repo.open(std.testing.allocator, "../microwindows/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + if (try repo.getObject(head)) |o| { + defer std.testing.allocator.free(o.data); + + std.debug.print("object: {s}\n", .{o.data}); + } +} + +test "get tree" { + var repo = try Repo.open(std.testing.allocator, "../microwindows/.git"); + defer repo.close(); + + if (try repo.getObject(0xe59b68a950b643f9ea50997b3cf359a5956e852c)) |o| { + defer std.testing.allocator.free(o.data); + + std.debug.print("tree: {s}\n", .{o.data}); + } +} // test "list commits" { // var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); // defer repo.close();