const std = @import("std"); const Alloc = std.mem.Allocator; const Reader = std.io.AnyReader; const Writer = std.io.AnyWriter; const MaxFileSize = 1024 * 1024; const Id = u160; const Commit = struct { tree: Id, parent: Id, author: []u8, committer: []u8, message: []u8, }; const TreeEntry = struct { permissions: []u8, name: []u8, id: Id, }; const Tree = std.ArrayList(TreeEntry); const Blob = struct { data: []u8, }; const ParsedObject = union(enum) { c: Commit, t: Tree, b: Blob, }; const Object = struct { kind: u3, data: []u8, pub fn init(kind: u3, data: []u8) Object { return .{ .kind = kind, .data = data, }; } pub fn parse(self: Object, alloc: Alloc) !ParsedObject { switch (self.kind) { 1 => { const authorOffset = std.mem.indexOf(u8, self.data, "author ") orelse return error.InvalidCommitFormat; const authorNewline = std.mem.indexOfScalarPos(u8, self.data, authorOffset, '\n') orelse return error.InvalidCommitFormat; const committerOffset = std.mem.indexOf(u8, self.data, "committer ") orelse return error.InvalidCommitFormat; const committerNewline = std.mem.indexOfScalarPos(u8, self.data, committerOffset, '\n') orelse return error.InvalidCommitFormat; return .{ .c = Commit{ .tree = try std.fmt.parseUnsigned(Id, self.data[5..45], 16), .parent = try std.fmt.parseUnsigned(Id, self.data[53..93], 16), .author = self.data[authorOffset..authorNewline], .committer = self.data[committerOffset..committerNewline], .message = self.data[committerNewline + 1 .. self.data.len], }, }; }, 2 => { var t = Tree.init(alloc); var offset: usize = 0; while (offset < self.data.len - 1) { const spaceOffset = std.mem.indexOfScalarPos(u8, self.data, offset, ' ') orelse return error.InvalidTreeFormat; const zeroOffset = std.mem.indexOfScalarPos(u8, self.data, spaceOffset, 0) orelse return error.InvalidTreeFormat; try t.append(.{ .permissions = self.data[offset..spaceOffset], .name = self.data[spaceOffset + 1 .. zeroOffset], .id = std.mem.readVarInt(Id, self.data[zeroOffset + 1 .. zeroOffset + 21], .big), }); offset = zeroOffset + 21; } return .{ .t = t }; }, 3 => { return .{ .b = Blob{ .data = self.data }, }; }, 4 => { return error.TagNotImplemented; }, else => return error.UnknownGitObjectType, } } // pub fn getCommit(self: *Object) Commit {} // pub fn getBlob(self: *Object) Blob {} }; fn decompress(alloc: Alloc, r: Reader) ![]u8 { var buffer = std.ArrayList(u8).init(alloc); try std.compress.zlib.decompress(r, buffer.writer().any()); return alloc.realloc(buffer.allocatedSlice(), buffer.items.len); } const PackFile = struct { alloc: Alloc, idxFile: std.fs.File, pckFile: std.fs.File, objectOffsets: std.AutoArrayHashMap(Id, u32), pub fn open(alloc: Alloc, dir: std.fs.Dir) !?PackFile { var self = PackFile{ .alloc = alloc, .idxFile = undefined, .pckFile = undefined, .objectOffsets = std.AutoArrayHashMap(Id, u32).init(alloc), }; var packDir = try dir.openDir("objects/pack", .{ .iterate = true }); defer packDir.close(); var packFileFound = false; var packIt = packDir.iterate(); while (try packIt.next()) |f| { if (std.mem.endsWith(u8, f.name, ".idx")) { const idxFilename = f.name; var pckFilename = try std.BoundedArray(u8, std.fs.max_path_bytes).init(0); try std.fmt.format( pckFilename.writer(), "{s}.pack", .{idxFilename[0 .. idxFilename.len - 4]}, ); self.idxFile = try packDir.openFile(idxFilename, .{}); self.pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); try self.parseIndex(); packFileFound = true; } } if (!packFileFound) return null; return self; } pub fn close(self: *PackFile) void { self.objectOffsets.deinit(); self.idxFile.close(); self.pckFile.close(); } pub fn parseIndex(self: *PackFile) !void { const idxReader = self.idxFile.reader().any(); var fanoutTable: [256]u32 = undefined; for (0..256) |i| { try self.idxFile.seekTo(8 + i * 4); fanoutTable[i] = try idxReader.readVarInt(u32, .big, 4); const numObjects = if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; for (0..numObjects) |j| { const idOffset = 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; try self.idxFile.seekTo(idOffset); const id = try idxReader.readVarInt(Id, .big, 20); try self.objectOffsets.put(id, 0); } } const numObjects = self.objectOffsets.keys().len; for (0..numObjects) |i| { const offsetOffset = 4 + 4 + 4 * 256 + numObjects * (20 + 4) + i * 4; try self.idxFile.seekTo(offsetOffset); const offset = try idxReader.readVarInt(u32, .big, 4); self.objectOffsets.values()[i] = offset; } } fn getSize(reader: Reader, ignoreTypeBits: bool) !struct { size: u64, bytelen: u64 } { var size: u64 = 0; var counter: u6 = 0; while (true) { const byte = try reader.readByte(); if (counter == 0) { if (ignoreTypeBits) { const bits: u4 = @truncate(byte); size = bits; } else { const bits: u7 = @truncate(byte); size = bits; } } else { if (ignoreTypeBits) { const bits: u7 = @truncate(byte); size += @as(u64, bits) << (7 * (counter - 1) + 4); } else { const bits: u7 = @truncate(byte); size += @as(u64, bits) << (7 * (counter)); } } if (byte & 0b10000000 == 0) { break; } counter += 1; } const nBytes = counter + 1; return .{ .size = size, .bytelen = nBytes, }; } fn getOffset(reader: Reader) !struct { offset: u64, bytelen: u64 } { var offset: u64 = 0; var counter: u4 = 0; while (true) { const byte = try reader.readByte(); const bits: u7 = @truncate(byte); offset <<= 7; offset += @as(u64, bits); if (byte & 0b10000000 == 0) { break; } counter += 1; } const nBytes = counter + 1; if (nBytes >= 2) { for (1..nBytes) |i| { offset += std.math.pow(u64, 2, 7 * i); } } return .{ .offset = offset, .bytelen = nBytes, }; } fn applyDelta(alloc: Alloc, baseData: []const u8, deltData: []const u8) ![]u8 { var fbs = std.io.fixedBufferStream(deltData); const deltDataReader = fbs.reader().any(); const baseObjectSize = try getSize(deltDataReader, false); const resultObjectSize = try getSize(deltDataReader, false); const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; const result = try alloc.alloc(u8, resultObjectSize.size); var resultCounter: u64 = 0; var counter: u64 = 0; while (true) { const b = deltData[deltaDataOffset + counter]; if (b & 0b10000000 != 0) { var dataOffset: u64 = 0; var dataSize: u64 = 0; var bitsSet: u8 = 0; for (0..4) |i| { // offset bits if (b & (@as(u64, 1) << @min(3, i)) != 0) { dataOffset += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); bitsSet += 1; } } for (4..7) |i| { // size bits if (b & (@as(u64, 1) << @min(6, i)) != 0) { dataSize += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); bitsSet += 1; } } counter += bitsSet; if (dataSize == 0) dataSize = 0x10000; std.mem.copyForwards( u8, result[resultCounter..result.len], baseData[dataOffset .. dataOffset + dataSize], ); resultCounter += dataSize; } else { const dataSize: u7 = @truncate(b); std.mem.copyForwards( u8, result[resultCounter..result.len], deltData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], ); resultCounter += dataSize; counter += dataSize; } counter += 1; if (deltaDataOffset + counter >= deltData.len) break; } return result; } fn ofsDelta(self: *PackFile, offset: i64) anyerror!Object { const pckReader = self.pckFile.reader().any(); const pos = try self.pckFile.getPos(); try self.pckFile.seekBy(-offset); const baseObject = try self.readObject(pckReader); defer self.alloc.free(baseObject.data); try self.pckFile.seekTo(pos); const deltaData = try decompress(self.alloc, pckReader); defer self.alloc.free(deltaData); const objectData = try applyDelta(self.alloc, baseObject.data, deltaData); return Object.init(baseObject.kind, objectData); } fn readObject(self: *PackFile, reader: Reader) anyerror!Object { const firstByte = try reader.readByte(); const objectKind: u3 = @truncate(firstByte >> 4); try self.pckFile.seekBy(-1); const objectSize = try getSize(reader, true); if (objectKind == 6) { const offset = try getOffset(reader); return try self.ofsDelta( @intCast(offset.offset + objectSize.bytelen + offset.bytelen), ); } else { const objectData = try decompress(self.alloc, reader); return Object.init(objectKind, objectData); } } pub fn getObject(self: *PackFile, id: Id) !?Object { if (self.objectOffsets.get(id)) |offset| { const pckReader = self.pckFile.reader().any(); try self.pckFile.seekTo(offset); const o = try self.readObject(pckReader); return o; } return null; } }; const Repo = struct { alloc: Alloc, dir: std.fs.Dir, packfile: ?PackFile, pub fn open(alloc: Alloc, path: []const u8) !Repo { const dir = try std.fs.cwd().openDir(path, .{}); const packfile = try PackFile.open(alloc, dir); return .{ .alloc = alloc, .dir = dir, .packfile = packfile, }; } pub fn close(self: *Repo) void { self.dir.close(); if (self.packfile != null) { self.packfile.?.close(); } } pub fn getHead(self: *Repo) !Id { // read file HEAD const head = try self.dir.readFileAlloc(self.alloc, "HEAD", 1024); defer self.alloc.free(head); // read file pointed at by HEAD const headPath = head[5 .. head.len - 1]; var idBuffer: [40]u8 = undefined; const idStr = try self.dir.readFile(headPath, &idBuffer); // parse id from file return try std.fmt.parseUnsigned(u160, idStr, 16); } pub fn getObject(self: *Repo, id: Id) !?Object { if (self.packfile) |*packfile| { return packfile.getObject(id); } return null; } }; test "print HEAD" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); std.debug.print("HEAD: {}\n", .{head}); } test "parse idx" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); if (repo.packfile) |packfile| { std.debug.print("{}\n", .{packfile.objectOffsets.keys().len}); std.debug.print("{}\n", .{packfile.objectOffsets.values().len}); } } test "get object" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); if (try repo.getObject(head)) |o| { defer alloc.free(o.data); std.debug.print("object({}): {s}\n", .{ o.kind, o.data }); } } test "parse commit" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); if (try repo.getObject(head)) |o| { defer alloc.free(o.data); switch (try o.parse(alloc)) { .c => |c| { std.debug.print("commit:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ c.tree, c.parent, c.author, c.committer, c.message }); }, else => {}, } } } test "get tree" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { defer alloc.free(o.data); // std.debug.print("tree({}): {any}\n", .{ o.kind, o.data }); } } test "parse tree" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { defer alloc.free(o.data); switch (try o.parse(alloc)) { .t => |t| { defer t.deinit(); for (t.items) |treeEntry| { std.debug.print("{s} {s} {x}\n", .{ treeEntry.permissions, treeEntry.name, treeEntry.id }); } }, else => {}, } } } test "list commits" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); var id = head; for (0..3) |_| { if (try repo.getObject(id)) |o| { defer alloc.free(o.data); switch (try o.parse(alloc)) { .c => |c| { std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); id = c.parent; }, else => {}, } } } } test "list blobs" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { defer alloc.free(o.data); switch (try o.parse(alloc)) { .t => |t| { defer t.deinit(); for (t.items) |treeEntry| { if (try repo.getObject(treeEntry.id)) |bo| { defer alloc.free(bo.data); if (treeEntry.permissions.len == 6) { std.debug.print("{s}: [{x} {}]{s}\n", .{ treeEntry.name, treeEntry.id, bo.data.len, bo.data[0..50] }); } else { std.debug.print("[{s}]\n", .{treeEntry.name}); } } } }, else => {}, } } } test "basic frontend" { var arena = std.heap.ArenaAllocator.init(std.testing.allocator); defer arena.deinit(); const alloc = arena.allocator(); var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); var id = head; for (0..3) |_| { if (try repo.getObject(id)) |o| { defer alloc.free(o.data); switch (try o.parse(alloc)) { .c => |c| { std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); id = c.parent; }, else => {}, } } } }