Module:Hash

From The Satanic Wiki
Jump to navigation Jump to search

Documentation for this module may be created at Module:Hash/doc

-- Imports

local luts = mw.loadData('Module:Hash/LUTs')

-- Utils: bit manipulation

local function u32_add(a, b)
    return (a + b) % 0x100000000
end

local function u32_and(a, b)
    local xx, z = 1, 0
    while (a > 0) and (b > 0) do
        if ((a % 2) + (b % 2)) == 2 then z = z + xx end
        a, b, xx = math.floor(a / 2), math.floor(b / 2), xx * 2
    end
    return z
end

local function u32_not(a)
    return 0xffffffff - a
end

local function u32_or(a, b)
    local xx, z = 1, 0
    while (a > 0) or (b > 0) do
        if ((a % 2) + (b % 2)) > 0 then z = z + xx end
        a, b, xx = math.floor(a / 2), math.floor(b / 2), xx * 2
    end
    return z
end

local function u32_rotl(a, x)
    -- We could do a `x = x % 32` or `assert(x % 32 == 0)`, but nah...
    local xx = math.floor(2 ^ (32 - x))
    return math.floor(a / xx) + ((a % xx) * math.floor(2 ^ x))
end

local function u32_xor(a, b)
    local xx, z = 1, 0
    while (a ~= b) do
        if (a % 2) ~= (b % 2) then z = z + xx end
        a, b, xx = math.floor(a / 2), math.floor(b / 2), xx * 2
    end
    return z
end

-- Utils: structs

local function pack_le_u32(u32)
    local b0 = u32 % 0x100; u32 = math.floor(u32 / 0x100)
    local b1 = u32 % 0x100; u32 = math.floor(u32 / 0x100)
    local b2 = u32 % 0x100; u32 = math.floor(u32 / 0x100)
    local b3 = u32 % 0x100
    return string.char(b0, b1, b2, b3)
end

local function pack_le_u64(u64)
    local b0 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b1 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b2 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b3 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b4 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b5 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b6 = u64 % 0x100; u64 = math.floor(u64 / 0x100)
    local b7 = u64 % 0x100
    return string.char(b0, b1, b2, b3, b4, b5, b6, b7)
end

local function unpack_le_u32(bytes, offset)
    return (string.byte(bytes, offset    )            ) +
           (string.byte(bytes, offset + 1) *     0x100) +
           (string.byte(bytes, offset + 2) *   0x10000) +
           (string.byte(bytes, offset + 3) * 0x1000000)
end

-- Utils: representation

local function bytes_to_hex(bytes)
    local repr = ''
    for i = 1, #bytes do
        repr = repr .. string.format('%02x', string.byte(bytes, i))
    end
    return repr
end

-- Utils: MW helpers

local function make_message(frame)
    local i, message = 0, ''
    for k, v in ipairs(frame.args) do
        if i > 0 then message = message .. '\0' end
        i = i + 1
        message = message .. v
    end
    -- Per Scribunto docs, we can't count the number of anonymous/numbered params with `#frame.args`
    assert(i > 0, 'Function must be invoked with at least one anonymous or numbered parameter')
    return message
end

-- Utils: hash functions

local function md5_f(x, y, z)
    return u32_or(u32_and(x, y), u32_and(u32_not(x), z))
end

local function md5_g(x, y, z)
    return u32_or(u32_and(x, z), u32_and(y, u32_not(z)))
end

local function md5_h(x, y, z)
    return u32_xor(u32_xor(x, y), z)
end

local function md5_i(x, y, z)
    return u32_xor(y, u32_or(x, u32_not(z)))
end

local function md5_op(a, b, c, d, aux, x_k, s, t_i)
    return u32_add(b, u32_rotl(u32_add(u32_add(a, aux(b, c, d)), u32_add(x_k, t_i)), s))
end

local function md5_init()
    return {
        is_finalized = false,
        buffer = '',
        message_length = 0,
        -- Initialize MD buffer (per section 3.3 of RFC 1321)
        a = 0x67452301,
        b = 0xefcdab89,
        c = 0x98badcfe,
        d = 0x10325476
    }
end

local function md5_update(context, message)
    assert(not context.is_finalized, 'MD5 context has already been finalized')

    local buffer = context.buffer .. message
    local partial_block_length = #buffer % 64
    context.buffer = (partial_block_length > 0) and string.sub(buffer, -partial_block_length) or ''
    context.message_length = context.message_length + #message

    -- Process each block (per section 3.4 of RFC 1321)
    for i = 0, math.floor(#buffer / 64) - 1 do
        local x = {}; for j = 0, 15 do x[j + 1] = unpack_le_u32(buffer, (i * 64) + (j * 4) + 1) end

        local a, b, c, d = context.a, context.b, context.c, context.d

        -- Round one
        a = md5_op(a, b, c, d, md5_f, x[ 1],  7, luts.md5_t[ 1])
        d = md5_op(d, a, b, c, md5_f, x[ 2], 12, luts.md5_t[ 2])
        c = md5_op(c, d, a, b, md5_f, x[ 3], 17, luts.md5_t[ 3])
        b = md5_op(b, c, d, a, md5_f, x[ 4], 22, luts.md5_t[ 4])
        a = md5_op(a, b, c, d, md5_f, x[ 5],  7, luts.md5_t[ 5])
        d = md5_op(d, a, b, c, md5_f, x[ 6], 12, luts.md5_t[ 6])
        c = md5_op(c, d, a, b, md5_f, x[ 7], 17, luts.md5_t[ 7])
        b = md5_op(b, c, d, a, md5_f, x[ 8], 22, luts.md5_t[ 8])
        a = md5_op(a, b, c, d, md5_f, x[ 9],  7, luts.md5_t[ 9])
        d = md5_op(d, a, b, c, md5_f, x[10], 12, luts.md5_t[10])
        c = md5_op(c, d, a, b, md5_f, x[11], 17, luts.md5_t[11])
        b = md5_op(b, c, d, a, md5_f, x[12], 22, luts.md5_t[12])
        a = md5_op(a, b, c, d, md5_f, x[13],  7, luts.md5_t[13])
        d = md5_op(d, a, b, c, md5_f, x[14], 12, luts.md5_t[14])
        c = md5_op(c, d, a, b, md5_f, x[15], 17, luts.md5_t[15])
        b = md5_op(b, c, d, a, md5_f, x[16], 22, luts.md5_t[16])

        -- Round two
        a = md5_op(a, b, c, d, md5_g, x[ 2],  5, luts.md5_t[17])
        d = md5_op(d, a, b, c, md5_g, x[ 7],  9, luts.md5_t[18])
        c = md5_op(c, d, a, b, md5_g, x[12], 14, luts.md5_t[19])
        b = md5_op(b, c, d, a, md5_g, x[ 1], 20, luts.md5_t[20])
        a = md5_op(a, b, c, d, md5_g, x[ 6],  5, luts.md5_t[21])
        d = md5_op(d, a, b, c, md5_g, x[11],  9, luts.md5_t[22])
        c = md5_op(c, d, a, b, md5_g, x[16], 14, luts.md5_t[23])
        b = md5_op(b, c, d, a, md5_g, x[ 5], 20, luts.md5_t[24])
        a = md5_op(a, b, c, d, md5_g, x[10],  5, luts.md5_t[25])
        d = md5_op(d, a, b, c, md5_g, x[15],  9, luts.md5_t[26])
        c = md5_op(c, d, a, b, md5_g, x[ 4], 14, luts.md5_t[27])
        b = md5_op(b, c, d, a, md5_g, x[ 9], 20, luts.md5_t[28])
        a = md5_op(a, b, c, d, md5_g, x[14],  5, luts.md5_t[29])
        d = md5_op(d, a, b, c, md5_g, x[ 3],  9, luts.md5_t[30])
        c = md5_op(c, d, a, b, md5_g, x[ 8], 14, luts.md5_t[31])
        b = md5_op(b, c, d, a, md5_g, x[13], 20, luts.md5_t[32])

        -- Round three
        a = md5_op(a, b, c, d, md5_h, x[ 6],  4, luts.md5_t[33])
        d = md5_op(d, a, b, c, md5_h, x[ 9], 11, luts.md5_t[34])
        c = md5_op(c, d, a, b, md5_h, x[12], 16, luts.md5_t[35])
        b = md5_op(b, c, d, a, md5_h, x[15], 23, luts.md5_t[36])
        a = md5_op(a, b, c, d, md5_h, x[ 2],  4, luts.md5_t[37])
        d = md5_op(d, a, b, c, md5_h, x[ 5], 11, luts.md5_t[38])
        c = md5_op(c, d, a, b, md5_h, x[ 8], 16, luts.md5_t[39])
        b = md5_op(b, c, d, a, md5_h, x[11], 23, luts.md5_t[40])
        a = md5_op(a, b, c, d, md5_h, x[14],  4, luts.md5_t[41])
        d = md5_op(d, a, b, c, md5_h, x[ 1], 11, luts.md5_t[42])
        c = md5_op(c, d, a, b, md5_h, x[ 4], 16, luts.md5_t[43])
        b = md5_op(b, c, d, a, md5_h, x[ 7], 23, luts.md5_t[44])
        a = md5_op(a, b, c, d, md5_h, x[10],  4, luts.md5_t[45])
        d = md5_op(d, a, b, c, md5_h, x[13], 11, luts.md5_t[46])
        c = md5_op(c, d, a, b, md5_h, x[16], 16, luts.md5_t[47])
        b = md5_op(b, c, d, a, md5_h, x[ 3], 23, luts.md5_t[48])

        -- Round four
        a = md5_op(a, b, c, d, md5_i, x[ 1],  6, luts.md5_t[49])
        d = md5_op(d, a, b, c, md5_i, x[ 8], 10, luts.md5_t[50])
        c = md5_op(c, d, a, b, md5_i, x[15], 15, luts.md5_t[51])
        b = md5_op(b, c, d, a, md5_i, x[ 6], 21, luts.md5_t[52])
        a = md5_op(a, b, c, d, md5_i, x[13],  6, luts.md5_t[53])
        d = md5_op(d, a, b, c, md5_i, x[ 4], 10, luts.md5_t[54])
        c = md5_op(c, d, a, b, md5_i, x[11], 15, luts.md5_t[55])
        b = md5_op(b, c, d, a, md5_i, x[ 2], 21, luts.md5_t[56])
        a = md5_op(a, b, c, d, md5_i, x[ 9],  6, luts.md5_t[57])
        d = md5_op(d, a, b, c, md5_i, x[16], 10, luts.md5_t[58])
        c = md5_op(c, d, a, b, md5_i, x[ 7], 15, luts.md5_t[59])
        b = md5_op(b, c, d, a, md5_i, x[14], 21, luts.md5_t[60])
        a = md5_op(a, b, c, d, md5_i, x[ 5],  6, luts.md5_t[61])
        d = md5_op(d, a, b, c, md5_i, x[12], 10, luts.md5_t[62])
        c = md5_op(c, d, a, b, md5_i, x[ 3], 15, luts.md5_t[63])
        b = md5_op(b, c, d, a, md5_i, x[10], 21, luts.md5_t[64])

        context.a = u32_add(context.a, a)
        context.b = u32_add(context.b, b)
        context.c = u32_add(context.c, c)
        context.d = u32_add(context.d, d)
    end
end

local function md5_finalize(context)
    assert(not context.is_finalized, 'MD5 context has already been finalized')

    -- Pad (per sections 3.1 and 3.2 of RFC 1321)
    local padding = '\128' .. string.rep('\0', ((56 - ((context.message_length + 1) % 64)) + 64) % 64) .. pack_le_u64(context.message_length * 8)
    assert(#context.buffer + #padding == ((#context.buffer < 56) and 64 or 128))

    md5_update(context, padding)

    -- Concatenate (per section 3.5 of RFC 1321)
    local digest = pack_le_u32(context.a) .. pack_le_u32(context.b) .. pack_le_u32(context.c) .. pack_le_u32(context.d)

    -- Clean up
    context.is_finalized = true
    context.buffer = nil

    return digest
end

local function MD5(message)
    local context = md5_init()
    if message then md5_update(context, message) end

    local iface, memoized_digest, memoized_hexdigest = {}, nil, nil

    function iface.update(message)
        assert(memoized_digest == nil, 'MD5 context has already been finalized')
        md5_update(context, message)
        return iface  -- Facilitate fluent interface
    end

    function iface.digest()
        if memoized_digest == nil then memoized_digest = md5_finalize(context); context = nil end
        return memoized_digest
    end

    function iface.hexdigest()
        if memoized_hexdigest == nil then memoized_hexdigest = bytes_to_hex(iface.digest()) end
        return memoized_hexdigest
    end

    return iface
end

-- Exports

local p = {}

-- Exports: for use in other libraries

p.MD5 = MD5

-- Exports: invocables

function p.md5_hexdigest(frame)
    local message = make_message(frame)
    return mw.hash and mw.hash.hashValue('md5', message) or MD5(message).hexdigest()
end

return p