const std = @import("std"); const GitObject = struct { type: u3, data: []const u8, }; fn getSize(buffer: []const u8, ignoreTypeBits: bool) struct { size: u64, bytelen: u64 } { var size: u64 = 0; var counter: u6 = 0; while (true) { if (counter == 0) { if (ignoreTypeBits) { const bits: u4 = @truncate(buffer[counter]); size = bits; } else { const bits: u7 = @truncate(buffer[counter]); size = bits; } } else { if (ignoreTypeBits) { const bits: u7 = @truncate(buffer[counter]); size += @as(u64, bits) << (7 * (counter - 1) + 4); } else { const bits: u7 = @truncate(buffer[counter]); size += @as(u64, bits) << (7 * (counter)); } } if (buffer[counter] & 0b10000000 == 0) { break; } counter += 1; } const nBytes = counter + 1; return .{ .size = size, .bytelen = nBytes, }; } fn getOffset(buffer: []const u8) struct { offset: u64, bytelen: u64 } { var offset: u64 = 0; var counter: u4 = 0; while (true) { const bits: u7 = @truncate(buffer[counter]); offset <<= 7; offset += @as(u64, bits); if (buffer[counter] & 0b10000000 == 0) { break; } counter += 1; } const nBytes = counter + 1; if (nBytes >= 2) { for (1..nBytes) |i| { offset += std.math.pow(u64, 2, 7 * i); } } return .{ .offset = offset, .bytelen = nBytes, }; } fn decompress(alloc: std.mem.Allocator, inBuffer: []const u8, size: usize) ![]u8 { const outBuffer = try alloc.alloc(u8, size); errdefer alloc.free(outBuffer); var inFbs = std.io.fixedBufferStream(inBuffer); const reader = inFbs.reader(); var outFbs = std.io.fixedBufferStream(outBuffer); const writer = outFbs.writer(); try std.compress.zlib.decompress(reader, writer); return outBuffer; } fn applyDelta(alloc: std.mem.Allocator, baseData: []const u8, deltaData: []const u8) ![]const u8 { const baseObjectSize = getSize(deltaData, false); const resultObjectSize = getSize(deltaData[baseObjectSize.bytelen..deltaData.len], false); const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; // std.debug.print("base: {}, result: {}\n", .{ baseObjectSize.size, resultObjectSize.size }); const result = try alloc.alloc(u8, resultObjectSize.size); var resultCounter: u64 = 0; var counter: u64 = 0; while (true) { const b = deltaData[deltaDataOffset + counter]; if (b & 0b10000000 != 0) { // if (b == 0b10010000) { // counter += 1; // } else { var dataOffset: u64 = 0; var dataSize: u64 = 0; var bitsSet: u8 = 0; for (0..4) |i| { // offset bits if (b & (@as(u64, 1) << @min(3, i)) != 0) { dataOffset += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); bitsSet += 1; } } for (4..7) |i| { // size bits if (b & (@as(u64, 1) << @min(6, i)) != 0) { dataSize += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); bitsSet += 1; } } counter += bitsSet; // std.debug.print("copying {} bytes of from {} data[{b:0>8}]: {s}\n", .{ dataSize, dataOffset, b, baseData[dataOffset .. dataOffset + dataSize] }); std.mem.copyForwards( u8, result[resultCounter..result.len], baseData[dataOffset .. dataOffset + dataSize], ); resultCounter += dataSize; // } } else { const dataSize: u7 = @truncate(b); // std.debug.print("pasting {} bytes: {s}\n", .{ dataSize, deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize] }); std.mem.copyForwards( u8, result[resultCounter..result.len], deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], ); resultCounter += dataSize; counter += dataSize; } counter += 1; if (deltaDataOffset + counter >= deltaData.len) break; } alloc.free(deltaData); return result; } const PackFile = struct { alloc: std.mem.Allocator, version: i32, objectNames: std.ArrayList(u160), objectOffsets: std.ArrayList(u32), packBuffer: []const u8, idxBuffer: []const u8, // TODO: // - decompress // - applyDelta pub fn init(alloc: std.mem.Allocator, packBuffer: []const u8, idxBuffer: []const u8) !PackFile { var result: PackFile = undefined; result.alloc = alloc; result.packBuffer = packBuffer; result.idxBuffer = idxBuffer; result.version = std.mem.readInt(i32, idxBuffer[4..8], .big); // N-th entry of this table records the number of objects in the corresponding pack, // the first byte of whose object name is less than or equal to N. var fanoutTable: [256]u32 = undefined; result.objectNames = std.ArrayList(u160).init(alloc); // result.crc32s = std.ArrayList(u32).init(alloc); result.objectOffsets = std.ArrayList(u32).init(alloc); for (0..256) |i| { fanoutTable[i] = std.mem.readVarInt(u32, idxBuffer[8 + i * 4 .. 8 + i * 4 + 4], .big); const numObjects = if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; // print("{} objects starting with {x:02}\n", .{ numObjects, i }); for (0..numObjects) |j| { const nameOffset = 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; const objectName = idxBuffer[nameOffset .. nameOffset + 20]; const objectNameInt = std.mem.readVarInt(u160, objectName, .big); // print("object name: {x}\n", .{objectNameInt}); try result.objectNames.append(objectNameInt); } } for (0..result.objectNames.items.len) |i| { const offsetOffset = 4 + 4 + 4 * 256 + result.objectNames.items.len * (20 + 4) + i * 4; const offsetInt = std.mem.readVarInt(u32, idxBuffer[offsetOffset .. offsetOffset + 4], .big); try result.objectOffsets.append(offsetInt); } return result; } pub fn deinit(self: @This()) void { self.objectNames.deinit(); self.objectOffsets.deinit(); } pub fn findObjectOffset(self: @This(), name: u160) ?usize { for (0..self.objectNames.items.len) |i| { if (self.objectNames.items[i] == name) return self.objectOffsets.items[i]; } return null; } pub fn getObject(self: @This(), index: u64) !GitObject { var objectType: u3 = @truncate(self.packBuffer[index] >> 4); const objectSize = getSize(self.packBuffer[index..self.packBuffer.len], true); // std.debug.print("getting object at index {}. type: {}, size: {}.\n", .{ // index, // objectType, // objectSize.size, // }); var objectData: []const u8 = undefined; if (objectType == 6) { const offsetSize = getOffset( self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len], ); const baseIndex = index - offsetSize.offset; const baseObject = try self.getObject(baseIndex); defer self.alloc.free(baseObject.data); const deltaData = try decompress( self.alloc, self.packBuffer[index + objectSize.bytelen + offsetSize.bytelen .. self.packBuffer.len], objectSize.size, ); objectData = try applyDelta(self.alloc, baseObject.data, deltaData); objectType = baseObject.type; } else if (objectType == 7) { const baseObjectName = self.packBuffer[index + objectSize.bytelen .. index + objectSize.bytelen + 20]; const baseObjectNameInt = std.mem.readVarInt(u160, baseObjectName, .big); var baseObjectOffset: ?u64 = null; for (0..self.objectNames.items.len) |i| { if (baseObjectNameInt == self.objectNames.items[i]) { baseObjectOffset = i; break; } } if (baseObjectOffset) |offset| { const baseObject = try self.getObject(offset); defer self.alloc.free(baseObject.data); const deltaData = try decompress( self.alloc, self.packBuffer[index + objectSize.bytelen + 20 .. self.packBuffer.len], objectSize.size, ); objectData = try applyDelta(self.alloc, baseObject.data, deltaData); objectType = baseObject.type; } else { std.debug.print("object {x} not found\n", .{baseObjectNameInt}); } } else { objectData = try decompress( self.alloc, self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len], objectSize.size, ); } // const objectStart = objectSize.bytelen; // objectData = self.packBuffer[objectStart .. objectStart + objectSize]; return .{ .type = objectType, .data = objectData, }; } }; pub fn main() !void { const packPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.pack"; const idxPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.idx"; const packBytes = 35920363; const idxBytes = 392036; var allocator = std.heap.GeneralPurposeAllocator(.{}){}; const alloc = allocator.allocator(); defer { const res = allocator.deinit(); std.debug.print("{}\n", .{res}); } const packBuffer = try std.fs.cwd().readFileAlloc(alloc, packPath, packBytes); const idxBuffer = try std.fs.cwd().readFileAlloc(alloc, idxPath, idxBytes); defer alloc.free(packBuffer); defer alloc.free(idxBuffer); const print = std.debug.print; const pf = try PackFile.init(alloc, packBuffer, idxBuffer); defer pf.deinit(); print("{} objects\n", .{pf.objectNames.items.len}); const r = std.io.getStdIn().reader(); var inputBuffer = std.mem.zeroes([1024]u8); while (true) { const input = try r.readUntilDelimiter(&inputBuffer, '\n'); const id = try std.fmt.parseInt(u160, input, 16); if (pf.findObjectOffset(id)) |offset| { const o = try pf.getObject(offset); if (o.type == 3) { std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]}); } else if (o.type == 2) { var counter: u64 = 0; while (counter < o.data.len) { const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break; const mode = o.data[counter .. counter + modeLen]; counter += modeLen + 1; var nameLen: u64 = 0; while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) { nameLen += 1; } const name = o.data[counter .. counter + nameLen]; counter += nameLen + 1; if (counter + 20 <= o.data.len) { const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big); counter += 20; std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash }); } } print("\n", .{}); } else { std.debug.print("type: {}\n", .{o.type}); } } } // for (0..pf.objectNames.items.len) |i| { // print("object {x:0>40}:\n", .{pf.objectNames.items[i]}); // const o = try pf.getObject(pf.objectOffsets.items[i]); // defer alloc.free(o.data); // if (o.type == 3) { // std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]}); // } else if (o.type == 2) { // var counter: u64 = 0; // while (counter < o.data.len) { // const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break; // const mode = o.data[counter .. counter + modeLen]; // counter += modeLen + 1; // var nameLen: u64 = 0; // while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) { // nameLen += 1; // } // const name = o.data[counter .. counter + nameLen]; // counter += nameLen + 1; // if (counter + 20 <= o.data.len) { // const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big); // counter += 20; // std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash }); // } // } // print("\n", .{}); // } else { // std.debug.print("type: {}\n", .{o.type}); // } // if (o.type == 4) { // std.debug.print("type: {}\ndata: {s}\n\n", .{ o.type, o.data }); // } // } }