X-Git-Url: https://gitweb.ps.run/ziggit/blobdiff_plain/24dac1661a5e54263608f6d5b9140bf5379074d3..8bb933882251b1c52c42f95af32c163149005396:/git.zig diff --git a/git.zig b/git.zig index fe82870..80e9aac 100644 --- a/git.zig +++ b/git.zig @@ -1,32 +1,123 @@ const std = @import("std"); const Alloc = std.mem.Allocator; +const Reader = std.io.AnyReader; +const Writer = std.io.AnyWriter; + +const MaxFileSize = 1024 * 1024; const Id = u160; const Commit = struct { - author: std.BoundedArray(u8, 64), - message: std.BoundedArray(u8, 1024), - parent: Id, tree: Id, + parent: Id, + author: []u8, + committer: []u8, + message: []u8, }; +const TreeEntry = struct { + permissions: []u8, + name: []u8, + id: Id, +}; +const Tree = std.ArrayList(TreeEntry); const Blob = struct { - data: std.BoundedArray(u8, 1024), + data: []u8, +}; +const ParsedObject = union(enum) { + c: Commit, + t: Tree, + b: Blob, }; const Object = struct { - alloc: Alloc, + kind: u3, + data: []u8, + + pub fn init(kind: u3, data: []u8) Object { + return .{ + .kind = kind, + .data = data, + }; + } + pub fn parse(self: Object, alloc: Alloc) !ParsedObject { + switch (self.kind) { + 1 => { + const authorOffset = std.mem.indexOf(u8, self.data, "author ") orelse return error.InvalidCommitFormat; + const authorNewline = std.mem.indexOfScalarPos(u8, self.data, authorOffset, '\n') orelse return error.InvalidCommitFormat; + const committerOffset = std.mem.indexOf(u8, self.data, "committer ") orelse return error.InvalidCommitFormat; + const committerNewline = std.mem.indexOfScalarPos(u8, self.data, committerOffset, '\n') orelse return error.InvalidCommitFormat; + + return .{ + .c = Commit{ + .tree = try std.fmt.parseUnsigned(Id, self.data[5..45], 16), + .parent = try std.fmt.parseUnsigned(Id, self.data[53..93], 16), + .author = self.data[authorOffset..authorNewline], + .committer = self.data[committerOffset..committerNewline], + .message = self.data[committerNewline + 1 .. self.data.len], + }, + }; + }, + 2 => { + var t = Tree.init(alloc); + var offset: usize = 0; + + while (offset < self.data.len - 1) { + const spaceOffset = std.mem.indexOfScalarPos(u8, self.data, offset, ' ') orelse return error.InvalidTreeFormat; + const zeroOffset = std.mem.indexOfScalarPos(u8, self.data, spaceOffset, 0) orelse return error.InvalidTreeFormat; + + try t.append(.{ + .permissions = self.data[offset..spaceOffset], + .name = self.data[spaceOffset + 1 .. zeroOffset], + .id = std.mem.readVarInt(Id, self.data[zeroOffset + 1 .. zeroOffset + 21], .big), + }); + + offset = zeroOffset + 21; + } + + return .{ .t = t }; + }, + 3 => { + return .{ + .b = Blob{ .data = self.data }, + }; + }, + 4 => { + return error.TagNotImplemented; + }, + else => return error.UnknownGitObjectType, + } + } // pub fn getCommit(self: *Object) Commit {} // pub fn getBlob(self: *Object) Blob {} }; + +fn decompress(alloc: Alloc, r: Reader) ![]u8 { + var buffer = std.ArrayList(u8).init(alloc); + + try std.compress.zlib.decompress(r, buffer.writer().any()); + + return alloc.realloc(buffer.allocatedSlice(), buffer.items.len); +} + const PackFile = struct { alloc: Alloc, + idxFile: std.fs.File, + pckFile: std.fs.File, + objectOffsets: std.AutoArrayHashMap(Id, u32), - pub fn open(alloc: Alloc, dir: std.fs.Dir) !PackFile { - var self = PackFile{ .alloc = alloc }; + pub fn open(alloc: Alloc, dir: std.fs.Dir) !?PackFile { + var self = PackFile{ + .alloc = alloc, + .idxFile = undefined, + .pckFile = undefined, + .objectOffsets = std.AutoArrayHashMap(Id, u32).init(alloc), + }; var packDir = try dir.openDir("objects/pack", .{ .iterate = true }); defer packDir.close(); + var packFileFound = false; + var packIt = packDir.iterate(); while (try packIt.next()) |f| { if (std.mem.endsWith(u8, f.name, ".idx")) { @@ -38,37 +129,244 @@ const PackFile = struct { .{idxFilename[0 .. idxFilename.len - 4]}, ); - const idxFile = try packDir.openFile(idxFilename, .{}); - const pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); - defer idxFile.close(); - defer pckFile.close(); + self.idxFile = try packDir.openFile(idxFilename, .{}); + self.pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); - const idxReader = idxFile.reader().any(); - const pckReader = pckFile.reader().any(); + try self.parseIndex(); - try self.parse(idxReader, pckReader); + packFileFound = true; } } + if (!packFileFound) + return null; + return self; } - pub fn parse(self: *PackFile, idxReader: std.io.AnyReader, pckReader: std.io.AnyReader) !void { - _ = self; - var buffer: [16]u8 = undefined; - _ = try idxReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); - _ = try pckReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); + + pub fn close(self: *PackFile) void { + self.objectOffsets.deinit(); + self.idxFile.close(); + self.pckFile.close(); } - // pub fn init(alloc: Alloc, path: []const u8) PackFile {} - // pub fn deinit(self: *PackFile) void {} - // pub fn getObject(self: *PackFile, id: Id) Object {} + pub fn parseIndex(self: *PackFile) !void { + const idxReader = self.idxFile.reader().any(); + + var fanoutTable: [256]u32 = undefined; + + for (0..256) |i| { + try self.idxFile.seekTo(8 + i * 4); + fanoutTable[i] = try idxReader.readVarInt(u32, .big, 4); + + const numObjects = + if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; + + for (0..numObjects) |j| { + const idOffset = + 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; + try self.idxFile.seekTo(idOffset); + const id = try idxReader.readVarInt(Id, .big, 20); + + try self.objectOffsets.put(id, 0); + } + } + + const numObjects = self.objectOffsets.keys().len; + for (0..numObjects) |i| { + const offsetOffset = + 4 + 4 + 4 * 256 + numObjects * (20 + 4) + i * 4; + try self.idxFile.seekTo(offsetOffset); + const offset = try idxReader.readVarInt(u32, .big, 4); + + self.objectOffsets.values()[i] = offset; + } + } + + fn getSize(reader: Reader, ignoreTypeBits: bool) !struct { size: u64, bytelen: u64 } { + var size: u64 = 0; + var counter: u6 = 0; + while (true) { + const byte = try reader.readByte(); + + if (counter == 0) { + if (ignoreTypeBits) { + const bits: u4 = @truncate(byte); + size = bits; + } else { + const bits: u7 = @truncate(byte); + size = bits; + } + } else { + if (ignoreTypeBits) { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter - 1) + 4); + } else { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter)); + } + } + + if (byte & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + return .{ + .size = size, + .bytelen = nBytes, + }; + } + + fn getOffset(reader: Reader) !struct { offset: u64, bytelen: u64 } { + var offset: u64 = 0; + var counter: u4 = 0; + while (true) { + const byte = try reader.readByte(); + + const bits: u7 = @truncate(byte); + offset <<= 7; + offset += @as(u64, bits); + + if (byte & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + if (nBytes >= 2) { + for (1..nBytes) |i| { + offset += std.math.pow(u64, 2, 7 * i); + } + } + return .{ + .offset = offset, + .bytelen = nBytes, + }; + } + + fn applyDelta(alloc: Alloc, baseData: []const u8, deltData: []const u8) ![]u8 { + var fbs = std.io.fixedBufferStream(deltData); + const deltDataReader = fbs.reader().any(); + const baseObjectSize = try getSize(deltDataReader, false); + const resultObjectSize = try getSize(deltDataReader, false); + const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; + + const result = try alloc.alloc(u8, resultObjectSize.size); + var resultCounter: u64 = 0; + + var counter: u64 = 0; + while (true) { + const b = deltData[deltaDataOffset + counter]; + + if (b & 0b10000000 != 0) { + var dataOffset: u64 = 0; + var dataSize: u64 = 0; + var bitsSet: u8 = 0; + for (0..4) |i| { // offset bits + if (b & (@as(u64, 1) << @min(3, i)) != 0) { + dataOffset += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); + bitsSet += 1; + } + } + for (4..7) |i| { // size bits + if (b & (@as(u64, 1) << @min(6, i)) != 0) { + dataSize += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); + bitsSet += 1; + } + } + counter += bitsSet; + + if (dataSize == 0) + dataSize = 0x10000; + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + baseData[dataOffset .. dataOffset + dataSize], + ); + + resultCounter += dataSize; + } else { + const dataSize: u7 = @truncate(b); + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + deltData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], + ); + + resultCounter += dataSize; + counter += dataSize; + } + + counter += 1; + if (deltaDataOffset + counter >= deltData.len) + break; + } + + return result; + } + + fn ofsDelta(self: *PackFile, offset: i64) anyerror!Object { + const pckReader = self.pckFile.reader().any(); + + const pos = try self.pckFile.getPos(); + + try self.pckFile.seekBy(-offset); + const baseObject = try self.readObject(pckReader); + defer self.alloc.free(baseObject.data); + + try self.pckFile.seekTo(pos); + const deltaData = try decompress(self.alloc, pckReader); + defer self.alloc.free(deltaData); + + const objectData = try applyDelta(self.alloc, baseObject.data, deltaData); + return Object.init(baseObject.kind, objectData); + } + + fn readObject(self: *PackFile, reader: Reader) anyerror!Object { + const firstByte = try reader.readByte(); + const objectKind: u3 = @truncate(firstByte >> 4); + try self.pckFile.seekBy(-1); + const objectSize = try getSize(reader, true); + + if (objectKind == 6) { + const offset = try getOffset(reader); + + return try self.ofsDelta( + @intCast(offset.offset + objectSize.bytelen + offset.bytelen), + ); + } else { + const objectData = try decompress(self.alloc, reader); + return Object.init(objectKind, objectData); + } + } + + pub fn getObject(self: *PackFile, id: Id) !?Object { + if (self.objectOffsets.get(id)) |offset| { + const pckReader = self.pckFile.reader().any(); + try self.pckFile.seekTo(offset); + + const o = try self.readObject(pckReader); + + return o; + } + return null; + } }; + const Repo = struct { alloc: Alloc, dir: std.fs.Dir, - packfile: PackFile, + packfile: ?PackFile, pub fn open(alloc: Alloc, path: []const u8) !Repo { const dir = try std.fs.cwd().openDir(path, .{}); @@ -81,9 +379,14 @@ const Repo = struct { .packfile = packfile, }; } + pub fn close(self: *Repo) void { self.dir.close(); + if (self.packfile != null) { + self.packfile.?.close(); + } } + pub fn getHead(self: *Repo) !Id { // read file HEAD const head = try self.dir.readFileAlloc(self.alloc, "HEAD", 1024); @@ -97,11 +400,21 @@ const Repo = struct { // parse id from file return try std.fmt.parseUnsigned(u160, idStr, 16); } - // pub fn getObject(self: *Repo, id: Id) Object {} + + pub fn getObject(self: *Repo, id: Id) !?Object { + if (self.packfile) |*packfile| { + return packfile.getObject(id); + } + return null; + } }; test "print HEAD" { - var repo = try Repo.open(std.testing.allocator, "../microwindows/.git"); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); const head = try repo.getHead(); @@ -109,41 +422,178 @@ test "print HEAD" { std.debug.print("HEAD: {}\n", .{head}); } -// test "list commits" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); +test "parse idx" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + if (repo.packfile) |packfile| { + std.debug.print("{}\n", .{packfile.objectOffsets.keys().len}); + std.debug.print("{}\n", .{packfile.objectOffsets.values().len}); + } +} + +test "get object" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + if (try repo.getObject(head)) |o| { + defer alloc.free(o.data); + + std.debug.print("object({}): {s}\n", .{ o.kind, o.data }); + } +} + +test "parse commit" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + if (try repo.getObject(head)) |o| { + defer alloc.free(o.data); + + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ c.tree, c.parent, c.author, c.committer, c.message }); + }, + else => {}, + } + } +} + +test "get tree" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const head = repo.getObject(repo.head); -// defer head.deinit(); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); -// var c = head.getCommit(); -// for (0..3) |_| { -// std.debug.print("{}\n", .{c}); -// c = c.parent; -// } -// } + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + defer alloc.free(o.data); -// test "tree" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); + // std.debug.print("tree({}): {any}\n", .{ o.kind, o.data }); + } +} -// const head = repo.getObject(repo.head); -// defer head.deinit(); +test "parse tree" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const commit = head.getCommit(); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); -// std.debug.print("{}\n", .{commit.tree}); -// } + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + defer alloc.free(o.data); -// test "blob" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); + switch (try o.parse(alloc)) { + .t => |t| { + defer t.deinit(); + for (t.items) |treeEntry| { + std.debug.print("{s} {s} {x}\n", .{ treeEntry.permissions, treeEntry.name, treeEntry.id }); + } + }, + else => {}, + } + } +} -// const head = repo.getObject(repo.head); -// defer head.deinit(); +test "list commits" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const commit = head.getCommit(); -// const blob = repo.getBlob(commit.files[0].id); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + var id = head; + + for (0..3) |_| { + if (try repo.getObject(id)) |o| { + defer alloc.free(o.data); + + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); + id = c.parent; + }, + else => {}, + } + } + } +} -// std.debug.print("{}\n", .{blob}); -// } +test "list blobs" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + defer alloc.free(o.data); + + switch (try o.parse(alloc)) { + .t => |t| { + defer t.deinit(); + for (t.items) |treeEntry| { + if (try repo.getObject(treeEntry.id)) |bo| { + defer alloc.free(bo.data); + + if (treeEntry.permissions.len == 6) { + std.debug.print("{s}: [{x} {}]{s}\n", .{ treeEntry.name, treeEntry.id, bo.data.len, bo.data[0..50] }); + } else { + std.debug.print("[{s}]\n", .{treeEntry.name}); + } + } + } + }, + else => {}, + } + } +} + +test "basic frontend" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + var id = head; + + for (0..3) |_| { + if (try repo.getObject(id)) |o| { + defer alloc.free(o.data); + + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); + id = c.parent; + }, + else => {}, + } + } + } +}