1 const std = @import("std");
3 const GitObject = struct {
8 fn getSize(buffer: []const u8, ignoreTypeBits: bool) struct { size: u64, bytelen: u64 } {
14 const bits: u4 = @truncate(buffer[counter]);
17 const bits: u7 = @truncate(buffer[counter]);
22 const bits: u7 = @truncate(buffer[counter]);
23 size += @as(u64, bits) << (7 * (counter - 1) + 4);
25 const bits: u7 = @truncate(buffer[counter]);
26 size += @as(u64, bits) << (7 * (counter));
30 if (buffer[counter] & 0b10000000 == 0) {
37 const nBytes = counter + 1;
45 fn getOffset(buffer: []const u8) struct { offset: u64, bytelen: u64 } {
49 const bits: u7 = @truncate(buffer[counter]);
51 offset += @as(u64, bits);
53 if (buffer[counter] & 0b10000000 == 0) {
60 const nBytes = counter + 1;
64 offset += std.math.pow(u64, 2, 7 * i);
73 fn decompress(alloc: std.mem.Allocator, inBuffer: []const u8, size: usize) ![]u8 {
74 const outBuffer = try alloc.alloc(u8, size);
75 errdefer alloc.free(outBuffer);
77 var inFbs = std.io.fixedBufferStream(inBuffer);
78 const reader = inFbs.reader();
80 var outFbs = std.io.fixedBufferStream(outBuffer);
81 const writer = outFbs.writer();
83 try std.compress.zlib.decompress(reader, writer);
88 fn applyDelta(alloc: std.mem.Allocator, baseData: []const u8, deltaData: []const u8) ![]const u8 {
89 const baseObjectSize = getSize(deltaData, false);
90 const resultObjectSize = getSize(deltaData[baseObjectSize.bytelen..deltaData.len], false);
91 const deltaDataOffset = baseObjectSize.bytelen + resultObjectSize.bytelen;
93 // std.debug.print("base: {}, result: {}\n", .{ baseObjectSize.size, resultObjectSize.size });
95 const result = try alloc.alloc(u8, resultObjectSize.size);
96 var resultCounter: u64 = 0;
100 const b = deltaData[deltaDataOffset + counter];
102 if (b & 0b10000000 != 0) {
103 // if (b == 0b10010000) {
106 var dataOffset: u64 = 0;
107 var dataSize: u64 = 0;
109 for (0..4) |i| { // offset bits
110 if (b & (@as(u64, 1) << @min(3, i)) != 0) {
111 dataOffset += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(3 * 8, i * 8);
115 for (4..7) |i| { // size bits
116 if (b & (@as(u64, 1) << @min(6, i)) != 0) {
117 dataSize += @as(u64, deltaData[deltaDataOffset + counter + 1 + bitsSet]) << @min(6 * 8, (i - 4) * 8);
123 // std.debug.print("copying {} bytes of from {} data[{b:0>8}]: {s}\n", .{ dataSize, dataOffset, b, baseData[dataOffset .. dataOffset + dataSize] });
125 std.mem.copyForwards(
127 result[resultCounter..result.len],
128 baseData[dataOffset .. dataOffset + dataSize],
131 resultCounter += dataSize;
134 const dataSize: u7 = @truncate(b);
135 // std.debug.print("pasting {} bytes: {s}\n", .{ dataSize, deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize] });
136 std.mem.copyForwards(
138 result[resultCounter..result.len],
139 deltaData[deltaDataOffset + counter + 1 .. deltaDataOffset + counter + 1 + dataSize],
141 resultCounter += dataSize;
146 if (deltaDataOffset + counter >= deltaData.len)
150 alloc.free(deltaData);
154 const PackFile = struct {
155 alloc: std.mem.Allocator,
157 objectNames: std.ArrayList(u160),
158 objectOffsets: std.ArrayList(u32),
159 packBuffer: []const u8,
160 idxBuffer: []const u8,
165 pub fn init(alloc: std.mem.Allocator, packBuffer: []const u8, idxBuffer: []const u8) !PackFile {
166 var result: PackFile = undefined;
167 result.alloc = alloc;
168 result.packBuffer = packBuffer;
169 result.idxBuffer = idxBuffer;
171 result.version = std.mem.readInt(i32, idxBuffer[4..8], .big);
173 // N-th entry of this table records the number of objects in the corresponding pack,
174 // the first byte of whose object name is less than or equal to N.
175 var fanoutTable: [256]u32 = undefined;
176 result.objectNames = std.ArrayList(u160).init(alloc);
177 // result.crc32s = std.ArrayList(u32).init(alloc);
178 result.objectOffsets = std.ArrayList(u32).init(alloc);
181 fanoutTable[i] = std.mem.readVarInt(u32, idxBuffer[8 + i * 4 .. 8 + i * 4 + 4], .big);
182 const numObjects = if (i > 0) fanoutTable[i] - fanoutTable[i - 1] else fanoutTable[i];
183 // print("{} objects starting with {x:02}\n", .{ numObjects, i });
185 for (0..numObjects) |j| {
186 const nameOffset = 4 + 4 + 4 * 256 + (j + if (i > 0) fanoutTable[i - 1] else 0) * 20;
187 const objectName = idxBuffer[nameOffset .. nameOffset + 20];
188 const objectNameInt = std.mem.readVarInt(u160, objectName, .big);
189 // print("object name: {x}\n", .{objectNameInt});
190 try result.objectNames.append(objectNameInt);
194 for (0..result.objectNames.items.len) |i| {
195 const offsetOffset = 4 + 4 + 4 * 256 + result.objectNames.items.len * (20 + 4) + i * 4;
196 const offsetInt = std.mem.readVarInt(u32, idxBuffer[offsetOffset .. offsetOffset + 4], .big);
198 try result.objectOffsets.append(offsetInt);
204 pub fn deinit(self: @This()) void {
205 self.objectNames.deinit();
206 self.objectOffsets.deinit();
209 pub fn findObjectOffset(self: @This(), name: u160) ?usize {
210 for (0..self.objectNames.items.len) |i| {
211 if (self.objectNames.items[i] == name)
212 return self.objectOffsets.items[i];
217 pub fn getObject(self: @This(), index: u64) !GitObject {
218 var objectType: u3 = @truncate(self.packBuffer[index] >> 4);
219 const objectSize = getSize(self.packBuffer[index..self.packBuffer.len], true);
221 // std.debug.print("getting object at index {}. type: {}, size: {}.\n", .{
227 var objectData: []const u8 = undefined;
229 if (objectType == 6) {
230 const offsetSize = getOffset(
231 self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len],
234 const baseIndex = index - offsetSize.offset;
236 const baseObject = try self.getObject(baseIndex);
237 defer self.alloc.free(baseObject.data);
238 const deltaData = try decompress(
240 self.packBuffer[index + objectSize.bytelen + offsetSize.bytelen .. self.packBuffer.len],
243 objectData = try applyDelta(self.alloc, baseObject.data, deltaData);
244 objectType = baseObject.type;
245 } else if (objectType == 7) {
246 const baseObjectName = self.packBuffer[index + objectSize.bytelen .. index + objectSize.bytelen + 20];
247 const baseObjectNameInt = std.mem.readVarInt(u160, baseObjectName, .big);
248 var baseObjectOffset: ?u64 = null;
249 for (0..self.objectNames.items.len) |i| {
250 if (baseObjectNameInt == self.objectNames.items[i]) {
251 baseObjectOffset = i;
255 if (baseObjectOffset) |offset| {
256 const baseObject = try self.getObject(offset);
257 defer self.alloc.free(baseObject.data);
258 const deltaData = try decompress(
260 self.packBuffer[index + objectSize.bytelen + 20 .. self.packBuffer.len],
263 objectData = try applyDelta(self.alloc, baseObject.data, deltaData);
264 objectType = baseObject.type;
266 std.debug.print("object {x} not found\n", .{baseObjectNameInt});
269 objectData = try decompress(
271 self.packBuffer[index + objectSize.bytelen .. self.packBuffer.len],
276 // const objectStart = objectSize.bytelen;
277 // objectData = self.packBuffer[objectStart .. objectStart + objectSize];
286 pub fn main() !void {
287 const packPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.pack";
288 const idxPath = "../microwindows/.git/objects/pack/pack-a2e25318e6fc668e1264fdaa11fb7223d5627143.idx";
290 const packBytes = 35920363;
291 const idxBytes = 392036;
293 var allocator = std.heap.GeneralPurposeAllocator(.{}){};
294 const alloc = allocator.allocator();
296 const res = allocator.deinit();
297 std.debug.print("{}\n", .{res});
300 const packBuffer = try std.fs.cwd().readFileAlloc(alloc, packPath, packBytes);
301 const idxBuffer = try std.fs.cwd().readFileAlloc(alloc, idxPath, idxBytes);
302 defer alloc.free(packBuffer);
303 defer alloc.free(idxBuffer);
305 const print = std.debug.print;
307 const pf = try PackFile.init(alloc, packBuffer, idxBuffer);
310 print("{} objects\n", .{pf.objectNames.items.len});
312 const r = std.io.getStdIn().reader();
313 var inputBuffer = std.mem.zeroes([1024]u8);
316 const input = try r.readUntilDelimiter(&inputBuffer, '\n');
318 const id = try std.fmt.parseInt(u160, input, 16);
320 if (pf.findObjectOffset(id)) |offset| {
321 const o = try pf.getObject(offset);
324 std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]});
325 } else if (o.type == 2) {
326 var counter: u64 = 0;
327 while (counter < o.data.len) {
328 const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break;
329 const mode = o.data[counter .. counter + modeLen];
330 counter += modeLen + 1;
332 var nameLen: u64 = 0;
333 while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) {
336 const name = o.data[counter .. counter + nameLen];
337 counter += nameLen + 1;
339 if (counter + 20 <= o.data.len) {
340 const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big);
343 std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash });
348 std.debug.print("type: {}\n", .{o.type});
353 // for (0..pf.objectNames.items.len) |i| {
354 // print("object {x:0>40}:\n", .{pf.objectNames.items[i]});
355 // const o = try pf.getObject(pf.objectOffsets.items[i]);
356 // defer alloc.free(o.data);
358 // if (o.type == 3) {
359 // std.debug.print("object data: {s}\n\n", .{o.data[0..@min(o.data.len, 1000)]});
360 // } else if (o.type == 2) {
361 // var counter: u64 = 0;
362 // while (counter < o.data.len) {
363 // const modeLen = std.mem.indexOfScalar(u8, o.data[counter..o.data.len], ' ') orelse break;
364 // const mode = o.data[counter .. counter + modeLen];
365 // counter += modeLen + 1;
367 // var nameLen: u64 = 0;
368 // while (counter + nameLen < o.data.len and o.data[counter + nameLen] != 0) {
371 // const name = o.data[counter .. counter + nameLen];
372 // counter += nameLen + 1;
374 // if (counter + 20 <= o.data.len) {
375 // const hash: u160 = std.mem.readVarInt(u160, o.data[counter .. counter + 20], .big);
378 // std.debug.print("{s} {s} {x:0>40}\n", .{ mode, name, hash });
383 // std.debug.print("type: {}\n", .{o.type});
386 // if (o.type == 4) {
387 // std.debug.print("type: {}\ndata: {s}\n\n", .{ o.type, o.data });