[PATCH 4/4] soc: bcm: brcmstb: biuctrl: Change RAC data line prefetching after 4 consecutive lines

From: Florian Fainelli
Date: Fri Sep 04 2020 - 16:51:36 EST


Change the RACPREFDATA(x) setting to prefetch the next 256-byte line
after 4 consecutive lines have been used, instead of after 2 consecutive
lines. This does improve the synthetic memcpy benchmark by an additional
+0.5% on top of the previous change for Cortex-A72 CPUs.

Signed-off-by: Florian Fainelli <f.fainelli@xxxxxxxxx>
---
drivers/soc/bcm/brcmstb/biuctrl.c | 12 +++++++++---
1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/soc/bcm/brcmstb/biuctrl.c b/drivers/soc/bcm/brcmstb/biuctrl.c
index 28f69cc0df51..63864b6dea2e 100644
--- a/drivers/soc/bcm/brcmstb/biuctrl.c
+++ b/drivers/soc/bcm/brcmstb/biuctrl.c
@@ -23,7 +23,9 @@
#define DPREF_LINE_2_SHIFT 24
#define DPREF_LINE_2_MASK 0xff

-/* Bitmask to enable instruction and data prefetching with a 256-bytes stride */
+/* Bitmask to enable instruction and data prefetching with a 256-bytes stride,
+ * prefetch next 256-byte line after 4 consecutive lines used
+ */
#define RAC_DATA_INST_EN_MASK (1 << RACPREFINST_SHIFT | \
RACENPREF_MASK << RACENINST_SHIFT | \
1 << RACPREFDATA_SHIFT | \
@@ -174,7 +176,7 @@ static const u32 a72_b53_mach_compat[] = {
static void __init a72_b53_rac_enable_all(struct device_node *np)
{
unsigned int cpu;
- u32 enable = 0, pref_dist;
+ u32 enable = 0, pref_dist, shift;

if (IS_ENABLED(CONFIG_CACHE_B15_RAC))
return;
@@ -184,9 +186,13 @@ static void __init a72_b53_rac_enable_all(struct device_node *np)

pref_dist = cbc_readl(RAC_CONFIG1_REG);
for_each_possible_cpu(cpu) {
+ shift = cpu * RAC_CPU_SHIFT + RACPREFDATA_SHIFT;
enable |= RAC_DATA_INST_EN_MASK << (cpu * RAC_CPU_SHIFT);
- if (cpubiuctrl_regs == a72_cpubiuctrl_regs)
+ if (cpubiuctrl_regs == a72_cpubiuctrl_regs) {
+ enable &= ~(RACENPREF_MASK << shift);
+ enable |= 3 << shift;
pref_dist |= 1 << (cpu + DPREF_LINE_2_SHIFT);
+ }
}

cbc_writel(enable, RAC_CONFIG0_REG);
--
2.25.1