#!/usr/bin/env python3
# create the packet-dmx-manfid.c file from
# https://tsp.esta.org/tsp/working_groups/CP/mfctrIDs.php
#
# Copyright 2025 by Matt Morris <mattm.dev.1[AT]gmail.com>
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later

import urllib.request
import re
import datetime

BASE_URL = "https://tsp.esta.org/tsp/working_groups/CP/mfctrIDs.php"
OUTPUT_FILE = "epan/dissectors/packet-dmx-manfid.c"

MIN_COUNT = 1685 # 1685 on 2025-12-01

REGEX_DATA = (
    '<td width="10%">([0-9a-fA-F]{4})h&nbsp;<\\/td>' +
    '\\n *<td width="15%">(.*?)&nbsp;<\\/td>' +
    '\\n *<td width="55%">(.*?)&nbsp;<\\/td>' +
    '\\n *<td width="10%">(.*?)&nbsp;<\\/td>'
)
REGEX_ROW = '<td width="55%">'

# clean-up the input data a little bit (change some wonky symbols and HTML escapes)
REPLACEMENTS = [
    ("  ",    " "),
    (" ",     ""), # Non-Breaking Space
    ("）",    ") "), # ） Fullwidth Right Parenthesis
    ("&#228", "ä"),  # ä Latin Small Letter A with Diaeresis
]
# special cases for known merged entries
SPECIALS = {
    "0000": "ESTA / PLASA",
    "0854": "Sharp / NEC Display Solutions, Ltd.",
    "4C5A": "Sumolight GmbH / LightMinded Industries, Inc.",
}
EXTRAS = [
    ("FFFC", "RDMNet RPT All Controllers"),
    ("FFFD", "RDMNet RPT All Devices"),
]

HEADER = f"""/*
 * This file was generated by running ./tools/make-dmx-manfid.py
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 *
 * The ESTA Manufacturer ID Database is available at:
 * <{BASE_URL}>
 *
 */

#include "packet-dmx-manfid.h"

/*
 * ESTA codes download date: {datetime.date.today().strftime("%Y-%m-%d")}
 */
"""


def main():
    req_headers = { 'User-Agent': 'Wireshark make-dmx-manfid' }
    urllib.request
    req = urllib.request.Request(BASE_URL, headers=req_headers)
    resp = urllib.request.urlopen(req)
    dat = resp.read().decode('UTF-8', 'replace')
    # print(dat)

    for r in REPLACEMENTS:
        dat = dat.replace(r[0], r[1])

    row_count = len(re.findall(REGEX_ROW, dat))
    rows = re.findall(REGEX_DATA, dat)

    # note that it should be one less because of the header row
    if len(rows) != row_count - 1:
        print("ERROR: number of detected rows doesn't match number of row data elements")
        exit()

    if len(rows) < MIN_COUNT:
        print("ERROR: number of manufacturers is less than ths stored previous count")
        exit()

    with open(OUTPUT_FILE, "w", encoding="UTF-8") as h:
        h.write(HEADER)
        h.write("static const value_string dmx_esta_manfid_vals[] = {\n")

        prev_hex = None
        for row in rows:
            hex_code = row[0].upper()
            manf_name = row[2].rstrip(" ").lstrip(" ")

            if prev_hex == hex_code:
                if hex_code not in SPECIALS.keys():
                    print(f"WARNING: Duplicate ManfID 0x{hex_code.decode()} doesn't have a merged form")
                continue
            manf_name = SPECIALS.get(hex_code, manf_name)

            while EXTRAS and hex_code > EXTRAS[0][0]:
                h.write("  { 0x" + EXTRAS[0][0] + ', "' + EXTRAS[0][1] + '" },\n')
                EXTRAS.pop(0)

            h.write("  { 0x" + hex_code + ', "' + manf_name + '" },\n')
            prev_hex = hex_code

        h.write("  { 0,      NULL }\n")
        h.write("};\n")
        h.write("value_string_ext dmx_esta_manfid_vals_ext = VALUE_STRING_EXT_INIT(dmx_esta_manfid_vals);\n")


if __name__ == "__main__":
    main()
