/* SPDX-License-Identifier: GPL-2.0 */ /* * AMD Address Translation Library * * internal.h : Helper functions and common defines * * Copyright (c) 2023, Advanced Micro Devices, Inc. * All Rights Reserved. * * Author: Yazen Ghannam */ #ifndef __AMD_ATL_INTERNAL_H__ #define __AMD_ATL_INTERNAL_H__ #include #include #include #include #include "reg_fields.h" #undef pr_fmt #define pr_fmt(fmt) "amd_atl: " fmt /* Maximum possible number of Coherent Stations within a single Data Fabric. */ #define MAX_COH_ST_CHANNELS 32 /* PCI ID for Zen4 Server DF Function 0. */ #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 /* PCI IDs for MI300 DF Function 0. */ #define DF_FUNC0_ID_MI300 0x15281022 /* Shift needed for adjusting register values to true values. */ #define DF_DRAM_BASE_LIMIT_LSB 28 #define MI300_DRAM_LIMIT_LSB 20 #define INVALID_SPA ~0ULL enum df_revisions { UNKNOWN, DF2, DF3, DF3p5, DF4, DF4p5, }; /* These are mapped 1:1 to the hardware values. Special cases are set at > 0x20. */ enum intlv_modes { NONE = 0x00, NOHASH_2CHAN = 0x01, NOHASH_4CHAN = 0x03, NOHASH_8CHAN = 0x05, DF3_6CHAN = 0x06, NOHASH_16CHAN = 0x07, NOHASH_32CHAN = 0x08, DF3_COD4_2CHAN_HASH = 0x0C, DF3_COD2_4CHAN_HASH = 0x0D, DF3_COD1_8CHAN_HASH = 0x0E, DF4_NPS4_2CHAN_HASH = 0x10, DF4_NPS2_4CHAN_HASH = 0x11, DF4_NPS1_8CHAN_HASH = 0x12, DF4_NPS4_3CHAN_HASH = 0x13, DF4_NPS2_6CHAN_HASH = 0x14, DF4_NPS1_12CHAN_HASH = 0x15, DF4_NPS2_5CHAN_HASH = 0x16, DF4_NPS1_10CHAN_HASH = 0x17, MI3_HASH_8CHAN = 0x18, MI3_HASH_16CHAN = 0x19, MI3_HASH_32CHAN = 0x1A, DF2_2CHAN_HASH = 0x21, /* DF4.5 modes are all IntLvNumChan + 0x20 */ DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, DF4p5_NPS0_24CHAN_1K_HASH = 0x2E, DF4p5_NPS4_2CHAN_1K_HASH = 0x30, DF4p5_NPS2_4CHAN_1K_HASH = 0x31, DF4p5_NPS1_8CHAN_1K_HASH = 0x32, DF4p5_NPS4_3CHAN_1K_HASH = 0x33, DF4p5_NPS2_6CHAN_1K_HASH = 0x34, DF4p5_NPS1_12CHAN_1K_HASH = 0x35, DF4p5_NPS2_5CHAN_1K_HASH = 0x36, DF4p5_NPS1_10CHAN_1K_HASH = 0x37, DF4p5_NPS4_2CHAN_2K_HASH = 0x40, DF4p5_NPS2_4CHAN_2K_HASH = 0x41, DF4p5_NPS1_8CHAN_2K_HASH = 0x42, DF4p5_NPS1_16CHAN_2K_HASH = 0x43, DF4p5_NPS4_3CHAN_2K_HASH = 0x44, DF4p5_NPS2_6CHAN_2K_HASH = 0x45, DF4p5_NPS1_12CHAN_2K_HASH = 0x46, DF4p5_NPS0_24CHAN_2K_HASH = 0x47, DF4p5_NPS2_5CHAN_2K_HASH = 0x48, DF4p5_NPS1_10CHAN_2K_HASH = 0x49, }; struct df4p5_denorm_ctx { /* Indicates the number of "lost" bits. This will be 1, 2, or 3. */ u8 perm_shift; /* A mask indicating the bits that need to be rehashed. */ u16 rehash_vector; /* * Represents the value that the high bits of the normalized address * are divided by during normalization. This value will be 3 for * interleave modes with a number of channels divisible by 3 or the * value will be 5 for interleave modes with a number of channels * divisible by 5. Power-of-two interleave modes are handled * separately. */ u8 mod_value; /* * Represents the bits that can be directly pulled from the normalized * address. In each case, pass through bits [7:0] of the normalized * address. The other bits depend on the interleave bit position which * will be bit 10 for 1K interleave stripe cases and bit 11 for 2K * interleave stripe cases. */ u64 base_denorm_addr; /* * Represents the high bits of the physical address that have been * divided by the mod_value. */ u64 div_addr; u64 current_spa; u64 resolved_spa; u16 coh_st_fabric_id; }; struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, heterogeneous : 1, __reserved_0 : 5; }; struct df_config { enum df_revisions rev; /* * These masks operate on the 16-bit Coherent Station IDs, * e.g. Instance, Fabric, Destination, etc. */ u16 component_id_mask; u16 die_id_mask; u16 node_id_mask; u16 socket_id_mask; /* * Least-significant bit of Node ID portion of the * system-wide Coherent Station Fabric ID. */ u8 node_id_shift; /* * Least-significant bit of Die portion of the Node ID. * Adjusted to include the Node ID shift in order to apply * to the Coherent Station Fabric ID. */ u8 die_id_shift; /* * Least-significant bit of Socket portion of the Node ID. * Adjusted to include the Node ID shift in order to apply * to the Coherent Station Fabric ID. */ u8 socket_id_shift; /* Number of DRAM Address maps visible in a Coherent Station. */ u8 num_coh_st_maps; u32 dram_hole_base; /* Global flags to handle special cases. */ struct df_flags flags; }; extern struct df_config df_cfg; struct dram_addr_map { /* * Each DRAM Address Map can operate independently * in different interleaving modes. */ enum intlv_modes intlv_mode; /* System-wide number for this address map. */ u8 num; /* Raw register values */ u32 base; u32 limit; u32 ctl; u32 intlv; /* * Logical to Physical Coherent Station Remapping array * * Index: Logical Coherent Station Instance ID * Value: Physical Coherent Station Instance ID * * phys_coh_st_inst_id = remap_array[log_coh_st_inst_id] */ u8 remap_array[MAX_COH_ST_CHANNELS]; /* * Number of bits covering DRAM Address map 0 * when interleaving is non-power-of-2. * * Used only for DF3_6CHAN. */ u8 np2_bits; /* Position of the 'interleave bit'. */ u8 intlv_bit_pos; /* Number of channels interleaved in this map. */ u8 num_intlv_chan; /* Number of dies interleaved in this map. */ u8 num_intlv_dies; /* Number of sockets interleaved in this map. */ u8 num_intlv_sockets; /* * Total number of channels interleaved accounting * for die and socket interleaving. */ u8 total_intlv_chan; /* Total bits needed to cover 'total_intlv_chan'. */ u8 total_intlv_bits; }; /* Original input values cached for debug printing. */ struct addr_ctx_inputs { u64 norm_addr; u8 socket_id; u8 die_id; u8 coh_st_inst_id; }; struct addr_ctx { u64 ret_addr; struct addr_ctx_inputs inputs; struct dram_addr_map map; /* AMD Node ID calculated from Socket and Die IDs. */ u8 node_id; /* * Coherent Station Instance ID * Local ID used within a 'node'. */ u16 inst_id; /* * Coherent Station Fabric ID * System-wide ID that includes 'node' bits. */ u16 coh_st_fabric_id; }; int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo); int get_df_system_info(void); int determine_node_id(struct addr_ctx *ctx, u8 socket_num, u8 die_num); int get_umc_info_mi300(void); int get_address_map(struct addr_ctx *ctx); int denormalize_address(struct addr_ctx *ctx); int dehash_address(struct addr_ctx *ctx); unsigned long norm_to_sys_addr(u8 socket_id, u8 die_id, u8 coh_st_inst_id, unsigned long addr); unsigned long convert_umc_mca_addr_to_sys_addr(struct atl_err *err); u64 add_base_and_hole(struct addr_ctx *ctx, u64 addr); u64 remove_base_and_hole(struct addr_ctx *ctx, u64 addr); #ifdef CONFIG_AMD_ATL_PRM unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr); #else static inline unsigned long prm_umc_norm_to_sys_addr(u8 socket_id, u64 umc_bank_inst_id, unsigned long addr) { return -ENODEV; } #endif /* * Make a gap in @data that is @num_bits long starting at @bit_num. * e.g. data = 11111111'b * bit_num = 3 * num_bits = 2 * result = 1111100111'b */ static inline u64 expand_bits(u8 bit_num, u8 num_bits, u64 data) { u64 temp1, temp2; if (!num_bits) return data; if (!bit_num) { WARN_ON_ONCE(num_bits >= BITS_PER_LONG); return data << num_bits; } WARN_ON_ONCE(bit_num >= BITS_PER_LONG); temp1 = data & GENMASK_ULL(bit_num - 1, 0); temp2 = data & GENMASK_ULL(63, bit_num); temp2 <<= num_bits; return temp1 | temp2; } /* * Remove bits in @data between @low_bit and @high_bit inclusive. * e.g. data = XXXYYZZZ'b * low_bit = 3 * high_bit = 4 * result = XXXZZZ'b */ static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data) { u64 temp1, temp2; WARN_ON_ONCE(high_bit >= BITS_PER_LONG); WARN_ON_ONCE(low_bit >= BITS_PER_LONG); WARN_ON_ONCE(low_bit > high_bit); if (!low_bit) return data >> (high_bit++); temp1 = GENMASK_ULL(low_bit - 1, 0) & data; temp2 = GENMASK_ULL(63, high_bit + 1) & data; temp2 >>= high_bit - low_bit + 1; return temp1 | temp2; } #define atl_debug(ctx, fmt, arg...) \ pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\ (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\ (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg) static inline void atl_debug_on_bad_df_rev(void) { pr_debug("Unrecognized DF rev: %u", df_cfg.rev); } static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) { atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); } #endif /* __AMD_ATL_INTERNAL_H__ */