From 7800ec8afda9b3e6163a7ff1c372d450411b8ff1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Patrick=20Sch=C3=B6nberger?= Date: Thu, 25 Apr 2024 17:53:12 +0200 Subject: [PATCH 1/1] finally add this to vc --- main.zig | 390 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 main.zig diff --git a/main.zig b/main.zig new file mode 100644 index 0000000..5b68cb9 --- /dev/null +++ b/main.zig @@ -0,0 +1,390 @@ +const std = @import("std"); + +const GitObject = struct { + type: u3, + data: []const u8, +}; + +fn getSize(buffer: []const u8, ignoreTypeBits: bool) struct { size: u64, bytelen: u64 } { + var size: u64 = 0; + var counter: u6 = 0; + while (true) { + if (counter == 0) { + if (ignoreTypeBits) { + const bits: u4 = @truncate(buffer[counter]); + size = bits; + } else { + const bits: u7 = @truncate(buffer[counter]); + size = bits; + } + } else { + if (ignoreTypeBits) { + const bits: u7 = @truncate(buffer[counter]); + size += @as(u64, bits) << (7 * (counter - 1) + 4); + } else { + const bits: u7 = @truncate(buffer[counter]); + size += @as(u64, bits) << (7 * (counter)); + } + } + + if (buffer[counter] & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + return .{ + .size = size, + .bytelen = nBytes, + }; +} + +fn getOffset(buffer: []const u8) struct { offset: u64, bytelen: u64 } { + var offset: u64 = 0; + var counter: u4 = 0; + while (true) { + const bits: u7 = @truncate(buffer[counter]); + offset <<= 7; + offset += @as(u64, bits); + + if (buffer[counter] & 0b10000000 == 0) { + break; + } + + counter += 1; + } + + const nBytes = counter + 1; + + if (nBytes >= 2) { + for (1..nBytes) |i| { + offset += std.math.pow(u64, 2, 7 * i); + } + } + return .{ + .offset = offset, + .bytelen = nBytes, + }; +} + +fn decompress(alloc: std.mem.Allocator, inBuffer: []const u8, size: usize) ![]u8 { + const outBuffer = try alloc.alloc(u8, size); + errdefer alloc.free(outBuffer); + + var inFbs = std.io.fixedBufferStream(inBuffer); + const reader = inFbs.reader(); + + var outFbs = std.io.fixedBufferStream(outBuffer); + const writer = outFbs.writer(); + + try std.compress.zlib.decompress(reader, writer); + + return outBuffer; +} + +fn applyDelta(alloc: std.mem.Allocator, baseData: []const u8, deltaData: []const u8) ![]const u8 { + const baseObjectSize = getSize(deltaData, false); + const resultObjectSize = getSize(deltaData[baseObjectSize.bytelen..deltaData.len], false); + const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen; + + // std.debug.print("base: {}, result: {}\n", .{ baseObjectSize.size, resultObjectSize.size }); + + const result = try alloc.alloc(u8, resultObjectSize.size); + var resultCounter: u64 = 0; + + var counter: u64 = 0; + while (true) { + const b = deltaData[deltaDataOffset + counter]; + + if (b & 0b10000000 != 0) { + // if (b == 0b10010000) { + // counter += 1; + // } else { + var dataOffset: u64 = 0; + var dataSize: u64 = 0; + var bitsSet: u8 = 0; + for (0..4) |i| { // offset bits + if (b & (@as(u64, 1) << @min(3, i)) != 0) { + dataOffset += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8); + bitsSet += 1; + } + } + for (4..7) |i| { // size bits + if (b & (@as(u64, 1) << @min(6, i)) != 0) { + dataSize += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8); + bitsSet += 1; + } + } + counter += bitsSet; + + // std.debug.print("copying {} bytes of from {} data[{b:0>8}]: {s}\n", .{ dataSize, dataOffset, b, baseData[dataOffset .. dataOffset + dataSize] }); + + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + baseData[dataOffset .. dataOffset + dataSize], + ); + + resultCounter += dataSize; + // } + } else { + const dataSize: u7 = @truncate(b); + // std.debug.print("pasting {} bytes: {s}\n", .{ dataSize, deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize] }); + std.mem.copyForwards( + u8, + result[resultCounter..result.len], + deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize], + ); + resultCounter += dataSize; + counter += dataSize; + } + + counter += 1; + if (deltaDataOffset + counter >= deltaData.len) + break; + } + + alloc.free(deltaData); + return result; +} + +const PackFile = struct { + alloc: std.mem.Allocator, + version: i32, + objectNames: std.ArrayList(u160), + objectOffsets: std.ArrayList(u32), + packBuffer: []const u8, + idxBuffer: []const u8, + // TODO: + // - decompress + // - applyDelta + + pub fn init(alloc: std.mem.Allocator, packBuffer: []const u8, idxBuffer: []const u8) !PackFile { + var result: PackFile = undefined; + result.alloc = alloc; + result.packBuffer = packBuffer; + result.idxBuffer = idxBuffer; + + result.version = std.mem.readInt(i32, idxBuffer[4..8], .big); + + // N-th entry of this table records the number of objects in the corresponding pack, + // the first byte of whose object name is less than or equal to N. + var fanoutTable: [256]u32 = undefined; + result.objectNames = std.ArrayList(u160).init(alloc); + // result.crc32s = std.ArrayList(u32).init(alloc); + result.objectOffsets = std.ArrayList(u32).init(alloc); + + for (0..256) |i| { + fanoutTable[i] = std.mem.readVarInt(u32, idxBuffer[8 + i * 4 .. 8 + i * 4 + 4], .big); + const numObjects = if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i]; + // print("{} objects starting with {x:02}\n", .{ numObjects, i }); + + for (0..numObjects) |j| { + const nameOffset = 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20; + const objectName = idxBuffer[nameOffset .. nameOffset + 20]; + const objectNameInt = std.mem.readVarInt(u160, objectName, .big); + // print("object name: {x}\n", .{objectNameInt}); + try result.objectNames.append(objectNameInt); + } + } + + for (0..result.objectNames.items.len) |i| { + const offsetOffset = 4 + 4 + 4 * 256 + result.objectNames.items.len * (20 + 4) + i * 4; + const offsetInt = std.mem.readVarInt(u32, idxBuffer[offsetOffset .. offsetOffset + 4], .big); + + try result.objectOffsets.append(offsetInt); + } + + return result; + } + + pub fn deinit(self: @This()) void { + self.objectNames.deinit(); + self.objectOffsets.deinit(); + } + + pub fn findObjectOffset(self: @This(), name: u160) ?usize { + for (0..self.objectNames.items.len) |i| { + if (self.objectNames.items[i] == name) + return self.objectOffsets.items[i]; + } + return null; + } + + pub fn getObject(self: @This(), index: u64) !GitObject { + var objectType: u3 = @truncate(self.packBuffer[index] >> 4); + const objectSize = getSize(self.packBuffer[index..self.packBuffer.len], true); + + // std.debug.print("getting object at index {}. type: {}, size: {}.\n", .{ + // index, + // objectType, + // objectSize.size, + // }); + + var objectData: []const u8 = undefined; + + if (objectType == 6) { + const offsetSize = getOffset( + self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len], + ); + + const baseIndex = index - offsetSize.offset; + + const baseObject = try self.getObject(baseIndex); + defer self.alloc.free(baseObject.data); + const deltaData = try decompress( + self.alloc, + self.packBuffer[index + objectSize.bytelen + offsetSize.bytelen .. self.packBuffer.len], + objectSize.size, + ); + objectData = try applyDelta(self.alloc, baseObject.data, deltaData); + objectType = baseObject.type; + } else if (objectType == 7) { + const baseObjectName = self.packBuffer[index + objectSize.bytelen .. index + objectSize.bytelen + 20]; + const baseObjectNameInt = std.mem.readVarInt(u160, baseObjectName, .big); + var baseObjectOffset: ?u64 = null; + for (0..self.objectNames.items.len) |i| { + if (baseObjectNameInt == self.objectNames.items[i]) { + baseObjectOffset = i; + break; + } + } + if (baseObjectOffset) |offset| { + const baseObject = try self.getObject(offset); + defer self.alloc.free(baseObject.data); + const deltaData = try decompress( + self.alloc, + self.packBuffer[index + objectSize.bytelen + 20 .. self.packBuffer.len], + objectSize.size, + ); + objectData = try applyDelta(self.alloc, baseObject.data, deltaData); + objectType = baseObject.type; + } else { + std.debug.print("object {x} not found\n", .{baseObjectNameInt}); + } + } else { + objectData = try decompress( + self.alloc, + self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len], + objectSize.size, + ); + } + + // const objectStart = objectSize.bytelen; + // objectData = self.packBuffer[objectStart .. objectStart + objectSize]; + + return .{ + .type = objectType, + .data = objectData, + }; + } +}; + +pub fn main() !void { + const packPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.pack"; + const idxPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.idx"; + + const packBytes = 35920363; + const idxBytes = 392036; + + var allocator = std.heap.GeneralPurposeAllocator(.{}){}; + const alloc = allocator.allocator(); + defer { + const res = allocator.deinit(); + std.debug.print("{}\n", .{res}); + } + + const packBuffer = try std.fs.cwd().readFileAlloc(alloc, packPath, packBytes); + const idxBuffer = try std.fs.cwd().readFileAlloc(alloc, idxPath, idxBytes); + defer alloc.free(packBuffer); + defer alloc.free(idxBuffer); + + const print = std.debug.print; + + const pf = try PackFile.init(alloc, packBuffer, idxBuffer); + defer pf.deinit(); + + print("{} objects\n", .{pf.objectNames.items.len}); + + const r = std.io.getStdIn().reader(); + var inputBuffer = std.mem.zeroes([1024]u8); + + while (true) { + const input = try r.readUntilDelimiter(&inputBuffer, '\n'); + + const id = try std.fmt.parseInt(u160, input, 16); + + if (pf.findObjectOffset(id)) |offset| { + const o = try pf.getObject(offset); + + if (o.type == 3) { + std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]}); + } else if (o.type == 2) { + var counter: u64 = 0; + while (counter < o.data.len) { + const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break; + const mode = o.data[counter .. counter + modeLen]; + counter += modeLen + 1; + + var nameLen: u64 = 0; + while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) { + nameLen += 1; + } + const name = o.data[counter .. counter + nameLen]; + counter += nameLen + 1; + + if (counter + 20 <= o.data.len) { + const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big); + counter += 20; + + std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash }); + } + } + print("\n", .{}); + } else { + std.debug.print("type: {}\n", .{o.type}); + } + } + } + + // for (0..pf.objectNames.items.len) |i| { + // print("object {x:0>40}:\n", .{pf.objectNames.items[i]}); + // const o = try pf.getObject(pf.objectOffsets.items[i]); + // defer alloc.free(o.data); + + // if (o.type == 3) { + // std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]}); + // } else if (o.type == 2) { + // var counter: u64 = 0; + // while (counter < o.data.len) { + // const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break; + // const mode = o.data[counter .. counter + modeLen]; + // counter += modeLen + 1; + + // var nameLen: u64 = 0; + // while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) { + // nameLen += 1; + // } + // const name = o.data[counter .. counter + nameLen]; + // counter += nameLen + 1; + + // if (counter + 20 <= o.data.len) { + // const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big); + // counter += 20; + + // std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash }); + // } + // } + // print("\n", .{}); + // } else { + // std.debug.print("type: {}\n", .{o.type}); + // } + + // if (o.type == 4) { + // std.debug.print("type: {}\ndata: {s}\n\n", .{ o.type, o.data }); + // } + // } +} -- 2.50.1