X-Git-Url: https://gitweb.ps.run/ziggit/blobdiff_plain/cf98fcd670a86203b54e89eff81c4854639baec0..HEAD:/git.zig diff --git a/git.zig b/git.zig index 3baa985..07be390 100644 --- a/git.zig +++ b/git.zig @@ -1,147 +1,580 @@ const std = @import("std"); const Alloc = std.mem.Allocator; +const Reader = std.io.AnyReader; +const Writer = std.io.AnyWriter; + +const MaxFileSize = 1024 * 1024; const Id = u160; const Commit = struct { - author: std.BoundedArray(u8, 64), - message: std.BoundedArray(u8, 1024), - parent: Id, tree: Id, + parent: Id, + author: []u8, + committer: []u8, + message: []u8, }; +const TreeEntry = struct { + permissions: []u8, + name: []u8, + id: Id, +}; +const Tree = std.ArrayList(TreeEntry); const Blob = struct { - data: std.BoundedArray(u8, 1024), + data: []u8, +}; +const ParsedObject = union(enum) { + c: Commit, + t: Tree, + b: Blob, }; const Object = struct { - alloc: Alloc, + kind: u3, + data: []u8, + + pub fn init(kind: u3, data: []u8) Object { + return .{ + .kind = kind, + .data = data, + }; + } + pub fn parse(self: Object, alloc: Alloc) !ParsedObject { + switch (self.kind) { + 1 => { + const authorOffset = std.mem.indexOf(u8, self.data, "author ") orelse return error.InvalidCommitFormat; + const authorNewline = std.mem.indexOfScalarPos(u8, self.data, authorOffset, '\n') orelse return error.InvalidCommitFormat; + const committerOffset = std.mem.indexOf(u8, self.data, "committer ") orelse return error.InvalidCommitFormat; + const committerNewline = std.mem.indexOfScalarPos(u8, self.data, committerOffset, '\n') orelse return error.InvalidCommitFormat; + + return .{ + .c = Commit{ + .tree = try std.fmt.parseUnsigned(Id, self.data[5..45], 16), + .parent = try std.fmt.parseUnsigned(Id, self.data[53..93], 16), + .author = self.data[authorOffset..authorNewline], + .committer = self.data[committerOffset..committerNewline], + .message = self.data[committerNewline + 1 .. self.data.len], + }, + }; + }, + 2 => { + var t = Tree.init(alloc); + + var offset: usize = 0; + while (offset < self.data.len - 1) { + const spaceOffset = std.mem.indexOfScalarPos(u8, self.data, offset, ' ') orelse return error.InvalidTreeFormat; + const zeroOffset = std.mem.indexOfScalarPos(u8, self.data, spaceOffset, 0) orelse return error.InvalidTreeFormat; + + try t.append(.{ + .permissions = self.data[offset..spaceOffset], + .name = self.data[spaceOffset + 1 .. zeroOffset], + .id = std.mem.readVarInt(Id, self.data[zeroOffset + 1 .. zeroOffset + 21], .big), + }); + + offset = zeroOffset + 21; + } + + return .{ .t = t }; + }, + 3 => { + return .{ + .b = Blob{ .data = self.data }, + }; + }, + 4 => { + return error.TagNotImplemented; + }, + else => return error.UnknownGitObjectType, + } + } // pub fn getCommit(self: *Object) Commit {} // pub fn getBlob(self: *Object) Blob {} }; + +fn decompress(alloc: Alloc, r: Reader) ![]u8 { + var buffer = std.ArrayList(u8).init(alloc); + + try std.compress.zlib.decompress(r, buffer.writer().any()); + + return alloc.realloc(buffer.allocatedSlice(), buffer.items.len); +} + const PackFile = struct { alloc: Alloc, + idxFile: std.fs.File, + pckFile: std.fs.File, + objectOffsets: std.AutoArrayHashMap(Id, u32), - pub fn init(alloc: Alloc) PackFile { - return .{ + pub fn open(alloc: Alloc, dir: std.fs.Dir) !?PackFile { + var self = PackFile{ .alloc = alloc, + .idxFile = undefined, + .pckFile = undefined, + .objectOffsets = std.AutoArrayHashMap(Id, u32).init(alloc), }; + + var packDir = try dir.openDir("objects/pack", .{ .iterate = true }); + defer packDir.close(); + + var packFileFound = false; + + var packIt = packDir.iterate(); + while (try packIt.next()) |f| { + if (std.mem.endsWith(u8, f.name, ".idx")) { + const idxFilename = f.name; + var pckFilename = try std.BoundedArray(u8, std.fs.max_path_bytes).init(0); + try std.fmt.format( + pckFilename.writer(), + "{s}.pack", + .{idxFilename[0 .. idxFilename.len - 4]}, + ); + + self.idxFile = try packDir.openFile(idxFilename, .{}); + self.pckFile = try packDir.openFile(pckFilename.constSlice(), .{}); + + try self.parseIndex(); + + packFileFound = true; + } + } + + if (!packFileFound) + return null; + + return self; } - pub fn deinit(self: *PackFile) void { - _ = self; + + pub fn close(self: *PackFile) void { + self.objectOffsets.deinit(); + self.idxFile.close(); + self.pckFile.close(); } - pub fn parse(self: *PackFile, idxReader: std.io.AnyReader, pckReader: anytype) !void { - _ = self; - var buffer: [16]u8 = undefined; - _ = try idxReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); - _ = try pckReader.read(&buffer); - std.debug.print("{s}\n", .{&buffer}); + + pub fn parseIndex(self: *PackFile) !void { + const idxReader = self.idxFile.reader().any(); + + var fanoutTable: [256]u32 = undefined; + + for (0..256) |i| { + try self.idxFile.seekTo(8 + i * 4); + fanoutTable[i] = try idxReader.readVarInt(u32, .big, 4); + + const numObjects = + if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; + + for (0..numObjects) |j| { + const idOffset = + 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; + try self.idxFile.seekTo(idOffset); + const id = try idxReader.readVarInt(Id, .big, 20); + + try self.objectOffsets.put(id, 0); + } + } + + const numObjects = self.objectOffsets.keys().len; + for (0..numObjects) |i| { + const offsetOffset = + 4 + 4 + 4 * 256 + numObjects * (20 + 4) + i * 4; + try self.idxFile.seekTo(offsetOffset); + const offset = try idxReader.readVarInt(u32, .big, 4); + + self.objectOffsets.values()[i] = offset; + } } - // pub fn init(alloc: Alloc, path: []const u8) PackFile {} - // pub fn deinit(self: *PackFile) void {} - // pub fn getObject(self: *PackFile, id: Id) Object {} -}; -const Repo = struct { - alloc: Alloc, - packfile: PackFile, + fn getSize(reader: Reader, ignoreTypeBits: bool) !struct { size: u64, bytelen: u64 } { + var size: u64 = 0; + var counter: u6 = 0; + while (true) { + const byte = try reader.readByte(); - head: Id, + if (counter == 0) { + if (ignoreTypeBits) { + const bits: u4 = @truncate(byte); + size = bits; + } else { + const bits: u7 = @truncate(byte); + size = bits; + } + } else { + if (ignoreTypeBits) { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter - 1) + 4); + } else { + const bits: u7 = @truncate(byte); + size += @as(u64, bits) << (7 * (counter)); + } + } - pub fn open(alloc: Alloc, path: []const u8) !Repo { - const dir = try std.fs.cwd().openDir(path, .{}); + if (byte & 0b10000000 == 0) { + break; + } - // read file HEAD - const head = try dir.readFileAlloc(alloc, "HEAD", 1024); - defer alloc.free(head); + counter += 1; + } - // read file pointed at by HEAD - const headPath = head[5 .. head.len - 1]; - var idBuffer: [40]u8 = undefined; - const idStr = try dir.readFile(headPath, &idBuffer); + const nBytes = counter + 1; - // parse id from file - const id = try std.fmt.parseUnsigned(u160, idStr, 16); - - // open any packfiles - var packfile = PackFile.init(alloc); - if (dir.openDir("objects/pack", .{ .iterate = true })) |packDir| { - var packIt = packDir.iterate(); - while (try packIt.next()) |f| { - if (std.mem.endsWith(u8, f.name, ".idx")) { - const idxFilename = f.name; - var pckFilenameBuffer: [64]u8 = undefined; - const pckFilename = try std.fmt.bufPrint(&pckFilenameBuffer, "{s}.pack", .{idxFilename[0 .. idxFilename.len - 4]}); - - const idxFile = try packDir.openFile(idxFilename, .{}); - const pckFile = try packDir.openFile(pckFilename, .{}); - defer idxFile.close(); - defer pckFile.close(); - - const idxReader = idxFile.reader().any(); - const pckReader = pckFile.reader().any(); - - try packfile.parse(idxReader, pckReader); + return .{ + .size = size, + .bytelen = nBytes, + }; + } + + fn getOffset(reader: Reader) !struct { offset: u64, bytelen: u64 } { + var offset: u64 = 0; + var counter: u4 = 0; + while (true) { + const byte = try reader.readByte(); + + const bits: u7 = @truncate(byte); + offset <<= 7; + offset += @as(u64, bits); + + if (byte & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + if (nBytes >= 2) { + for (1..nBytes) |i| { + offset += std.math.pow(u64, 2, 7 * i); + } + } + return .{ + .offset = offset, + .bytelen = nBytes, + }; + } + + fn applyDelta(alloc: Alloc, baseData: []const u8, deltData: []const u8) ![]u8 { + var fbs = std.io.fixedBufferStream(deltData); + const deltDataReader = fbs.reader().any(); + const baseObjectSize = try getSize(deltDataReader, false); + const resultObjectSize = try getSize(deltDataReader, false); + const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; + + const result = try alloc.alloc(u8, resultObjectSize.size); + var resultCounter: u64 = 0; + + var counter: u64 = 0; + while (true) { + const b = deltData[deltaDataOffset + counter]; + + if (b & 0b10000000 != 0) { + var dataOffset: u64 = 0; + var dataSize: u64 = 0; + var bitsSet: u8 = 0; + for (0..4) |i| { // offset bits + if (b & (@as(u64, 1) << @min(3, i)) != 0) { + dataOffset += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); + bitsSet += 1; + } + } + for (4..7) |i| { // size bits + if (b & (@as(u64, 1) << @min(6, i)) != 0) { + dataSize += @as(u64, deltData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); + bitsSet += 1; + } } + counter += bitsSet; + + if (dataSize == 0) + dataSize = 0x10000; + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + baseData[dataOffset .. dataOffset + dataSize], + ); + + resultCounter += dataSize; + } else { + const dataSize: u7 = @truncate(b); + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + deltData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], + ); + + resultCounter += dataSize; + counter += dataSize; } - } else |err| { - std.debug.print("{}\n", .{err}); + + counter += 1; + if (deltaDataOffset + counter >= deltData.len) + break; + } + + return result; + } + + fn ofsDelta(self: *PackFile, offset: i64) anyerror!Object { + const pckReader = self.pckFile.reader().any(); + + const pos = try self.pckFile.getPos(); + + try self.pckFile.seekBy(-offset); + const baseObject = try self.readObject(pckReader); + + try self.pckFile.seekTo(pos); + const deltaData = try decompress(self.alloc, pckReader); + + const objectData = try applyDelta(self.alloc, baseObject.data, deltaData); + return Object.init(baseObject.kind, objectData); + } + + fn readObject(self: *PackFile, reader: Reader) anyerror!Object { + const firstByte = try reader.readByte(); + const objectKind: u3 = @truncate(firstByte >> 4); + try self.pckFile.seekBy(-1); + const objectSize = try getSize(reader, true); + + if (objectKind == 6) { + const offset = try getOffset(reader); + + return try self.ofsDelta( + @intCast(offset.offset + objectSize.bytelen + offset.bytelen), + ); + } else { + const objectData = try decompress(self.alloc, reader); + return Object.init(objectKind, objectData); } + } + + pub fn getObject(self: *PackFile, id: Id) !?Object { + if (self.objectOffsets.get(id)) |offset| { + const pckReader = self.pckFile.reader().any(); + try self.pckFile.seekTo(offset); + + const o = try self.readObject(pckReader); + + return o; + } + return null; + } +}; + +const Repo = struct { + alloc: Alloc, + dir: std.fs.Dir, + packfile: ?PackFile, + + pub fn open(alloc: Alloc, path: []const u8) !Repo { + const dir = try std.fs.cwd().openDir(path, .{}); + + const packfile = try PackFile.open(alloc, dir); return .{ .alloc = alloc, + .dir = dir, .packfile = packfile, - .head = id, }; } + pub fn close(self: *Repo) void { - self.packfile.deinit(); + self.dir.close(); + if (self.packfile != null) { + self.packfile.?.close(); + } + } + + pub fn getHead(self: *Repo) !Id { + // read file HEAD + const head = try self.dir.readFileAlloc(self.alloc, "HEAD", 1024); + + // read file pointed at by HEAD + const headPath = head[5 .. head.len - 1]; + var idBuffer: [40]u8 = undefined; + const idStr = try self.dir.readFile(headPath, &idBuffer); + + // parse id from file + return try std.fmt.parseUnsigned(u160, idStr, 16); + } + + pub fn getObject(self: *Repo, id: Id) !?Object { + if (self.packfile) |*packfile| { + return packfile.getObject(id); + } + return null; } - // pub fn getObject(self: *Repo, id: Id) Object {} }; test "print HEAD" { - var repo = try Repo.open(std.testing.allocator, "../microwindows/.git"); + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); defer repo.close(); - std.debug.print("HEAD: {}\n", .{repo.head}); + const head = try repo.getHead(); + + std.debug.print("HEAD: {}\n", .{head}); } -// test "list commits" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); +test "parse idx" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const head = repo.getObject(repo.head); -// defer head.deinit(); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); -// var c = head.getCommit(); -// for (0..3) |_| { -// std.debug.print("{}\n", .{c}); -// c = c.parent; -// } -// } + if (repo.packfile) |packfile| { + std.debug.print("{}\n", .{packfile.objectOffsets.keys().len}); + std.debug.print("{}\n", .{packfile.objectOffsets.values().len}); + } +} -// test "tree" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); +test "get object" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const head = repo.getObject(repo.head); -// defer head.deinit(); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); -// const commit = head.getCommit(); + const head = try repo.getHead(); -// std.debug.print("{}\n", .{commit.tree}); -// } + if (try repo.getObject(head)) |o| { + std.debug.print("object({}): {s}\n", .{ o.kind, o.data }); + } +} -// test "blob" { -// var repo = Repo.open(std.testing.allocator, "../microwindows/.git"); -// defer repo.close(); +test "parse commit" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); -// const head = repo.getObject(repo.head); -// defer head.deinit(); + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); -// const commit = head.getCommit(); -// const blob = repo.getBlob(commit.files[0].id); + const head = try repo.getHead(); -// std.debug.print("{}\n", .{blob}); -// } + if (try repo.getObject(head)) |o| { + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ c.tree, c.parent, c.author, c.committer, c.message }); + }, + else => {}, + } + } +} + +test "get tree" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + std.debug.print("tree({}): {any}\n", .{ o.kind, o.data }); + } +} + +test "parse tree" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + switch (try o.parse(alloc)) { + .t => |t| { + defer t.deinit(); + for (t.items) |treeEntry| { + std.debug.print("{s} {s} {x}\n", .{ treeEntry.permissions, treeEntry.name, treeEntry.id }); + } + }, + else => {}, + } + } +} + +test "list commits" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + var id = head; + + for (0..3) |_| { + if (try repo.getObject(id)) |o| { + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); + id = c.parent; + }, + else => {}, + } + } + } +} + +test "list blobs" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + if (try repo.getObject(0xceb2b2c62d6f8f3686dcacecd5be931839b02c77)) |o| { + switch (try o.parse(alloc)) { + .t => |t| { + defer t.deinit(); + for (t.items) |treeEntry| { + if (try repo.getObject(treeEntry.id)) |bo| { + if (treeEntry.permissions.len == 6) { + std.debug.print("{s}: [{x} {}]{s}\n", .{ treeEntry.name, treeEntry.id, bo.data.len, bo.data[0..50] }); + } else { + std.debug.print("[{s}]\n", .{treeEntry.name}); + } + } + } + }, + else => {}, + } + } +} + +test "basic frontend" { + var arena = std.heap.ArenaAllocator.init(std.testing.allocator); + defer arena.deinit(); + const alloc = arena.allocator(); + + var repo = try Repo.open(alloc, "../imgui/.git"); + defer repo.close(); + + const head = try repo.getHead(); + + var id = head; + + for (0..3) |_| { + if (try repo.getObject(id)) |o| { + switch (try o.parse(alloc)) { + .c => |c| { + std.debug.print("commit {x}:\n tree: {x}\n parent: {x}\n author: {s}\n committer: {s}\n message: {s}\n", .{ id, c.tree, c.parent, c.author, c.committer, c.message }); + id = c.parent; + }, + else => {}, + } + } + } +}