drm fixes for 6.15-rc3

dma-buf:
 - Correctly decrement refcounter on errors
 
 gem:
 - Fix test for imported buffers
 
 amdgpu:
 - Cleaner shader sysfs fix
 - Suspend fix
 - Fix doorbell free ordering
 - Video caps fix
 - DML2 memory allocation optimization
 - HDP fix
 
 i915:
 - Fix DP DSC configurations that require 3 DSC engines per pipe
 
 xe:
 - Fix LRC address being written too late for GuC
 - Fix notifier vs folio deadlock
 - Fix race betwen dma_buf unmap and vram eviction
 - Fix debugfs handling PXP terminations unconditionally
 
 msm:
 - Display:
   - Fix to call dpu_plane_atomic_check_pipe() for both SSPPs in
     case of multi-rect
   - Fix to validate plane_state pointer before using it in
     dpu_plane_virtual_atomic_check()
   - Fix to make sure dereferencing dpu_encoder_phys happens after
     making sure it is valid in _dpu_encoder_trigger_start()
   - Remove the remaining intr_tear_rd_ptr which we initialized
     to -1 because NO_IRQ indices start from 0 now
 - GPU:
   - Fix IB_SIZE overflow
 
 ivpu:
 - Fix debugging
 - Fixes to frequency
 - Support firmware API 3.28.3
 - Flush jobs upon reset
 
 mgag200:
 - Set vblank start to correct values
 
 v3d:
 - Fix Indirect Dispatch
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEEKbZHaGwW9KfbeusDHTzWXnEhr4FAmgDMZIACgkQDHTzWXnE
 hr5X9Q//XIrnshh9g1eaUoBiY1gmvDFX79rAnwP5hSYQb+uA+u7ym8fIH8Dv1uIk
 2SsRCx6BYaaVswmn5JeUWosINbHoXDBQEbJGztyIq2X53I95Xz0TEMnUwD9+rYfC
 oN3Rc59v6ffb5SChKmhp1xeAPzAnl8GC2EdVB6SIOhh4y7H2pX45W2DGR85K5rLX
 X9EwoxN7cKYyf5CHSYs3G44AS/9ho96FqNTbuz9MMUeQEBn7110JJiID1TDzd3yc
 5/RU+qk//fNGMrHpSIevzlOe97iXAUoscp85MdMTduuclok3x/C2Tga3pgeeGBDG
 jXktzZZElzSXGfQbKvil0kMbW6aLfMveyFCPf5t4QRvGo68iW/h3Sk+Qe43TBLCb
 wjXOekYEESfrse/xhyoymJU8FxJGMA+2p/z3dwbsLPMIViYp0/khEGB84AjFWEHI
 hwXmiIgl4N7iQ9Fu8jiLQnlTeFatC/ykvHZGE2BDl2nTUYAa5sjXLQf70+/mB3XH
 2an4UhJGkCN6wzL2fyUE2EWZF27GppTnt5+fwjU9jlSxRVxFeCQU7Eh7QauG0PcC
 z0XumJOmMdZKl38YW7/AdevhQNhGSnXYbxwEvNmRc+TWIfjk9dFb82SGQMiOq0FK
 EeF5iFsiQWSirinaNmhG6ohrtV+TKZd1x6deChA6x+Z1X4RbAKA=
 =vfk0
 -----END PGP SIGNATURE-----

Merge tag 'drm-fixes-2025-04-19' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Easter rc3 pull request, fixes in all the usuals, amdgpu, xe, msm,
  with some i915/ivpu/mgag200/v3d fixes, then a couple of bits in
  dma-buf/gem.

  Hopefully has no easter eggs in it.

  dma-buf:
   - Correctly decrement refcounter on errors

  gem:
   - Fix test for imported buffers

  amdgpu:
   - Cleaner shader sysfs fix
   - Suspend fix
   - Fix doorbell free ordering
   - Video caps fix
   - DML2 memory allocation optimization
   - HDP fix

  i915:
   - Fix DP DSC configurations that require 3 DSC engines per pipe

  xe:
   - Fix LRC address being written too late for GuC
   - Fix notifier vs folio deadlock
   - Fix race betwen dma_buf unmap and vram eviction
   - Fix debugfs handling PXP terminations unconditionally

  msm:
   - Display:
       - Fix to call dpu_plane_atomic_check_pipe() for both SSPPs in
         case of multi-rect
       - Fix to validate plane_state pointer before using it in
         dpu_plane_virtual_atomic_check()
       - Fix to make sure dereferencing dpu_encoder_phys happens after
         making sure it is valid in _dpu_encoder_trigger_start()
       - Remove the remaining intr_tear_rd_ptr which we initialized to
         -1 because NO_IRQ indices start from 0 now
   - GPU:
       - Fix IB_SIZE overflow

  ivpu:
   - Fix debugging
   - Fixes to frequency
   - Support firmware API 3.28.3
   - Flush jobs upon reset

  mgag200:
   - Set vblank start to correct values

  v3d:
   - Fix Indirect Dispatch"

* tag 'drm-fixes-2025-04-19' of https://gitlab.freedesktop.org/drm/kernel: (26 commits)
  drm/msm/a6xx+: Don't let IB_SIZE overflow
  drm/xe/pxp: do not queue unneeded terminations from debugfs
  drm/xe/dma_buf: stop relying on placement in unmap
  drm/xe/userptr: fix notifier vs folio deadlock
  drm/xe: Set LRC addresses before guc load
  drm/mgag200: Fix value in <VBLKSTR> register
  drm/gem: Internally test import_attach for imported objects
  drm/amdgpu: Use the right function for hdp flush
  drm/amd/display/dml2: use vzalloc rather than kzalloc
  drm/amdgpu: Add back JPEG to video caps for carrizo and newer
  drm/amdgpu: fix warning of drm_mm_clean
  drm/amd: Forbid suspending into non-default suspend states
  drm/amdgpu: use a dummy owner for sysfs triggered cleaner shaders v4
  drm/i915/dp: Check for HAS_DSC_3ENGINES while configuring DSC slices
  drm/i915/display: Add macro for checking 3 DSC engines
  dma-buf/sw_sync: Decrement refcount on error in sw_sync_ioctl_get_deadline()
  accel/ivpu: Add cmdq_id to job related logs
  accel/ivpu: Show NPU frequency in sysfs
  accel/ivpu: Fix the NPU's DPU frequency calculation
  accel/ivpu: Update FW Boot API to version 3.28.3
  ...
This commit is contained in:
Linus Torvalds 2025-04-19 09:31:21 -07:00
commit 51c7960b87
47 changed files with 407 additions and 285 deletions

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@ -164,7 +164,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
args->value = vdev->platform;
break;
case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
args->value = ivpu_hw_dpu_max_freq_get(vdev);
break;
case DRM_IVPU_PARAM_NUM_CONTEXTS:
args->value = ivpu_get_context_count(vdev);
@ -421,9 +421,9 @@ void ivpu_prepare_for_reset(struct ivpu_device *vdev)
{
ivpu_hw_irq_disable(vdev);
disable_irq(vdev->irq);
cancel_work_sync(&vdev->irq_ipc_work);
cancel_work_sync(&vdev->irq_dct_work);
cancel_work_sync(&vdev->context_abort_work);
flush_work(&vdev->irq_ipc_work);
flush_work(&vdev->irq_dct_work);
flush_work(&vdev->context_abort_work);
ivpu_ipc_disable(vdev);
ivpu_mmu_disable(vdev);
}

View File

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/firmware.h>
@ -233,10 +233,20 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->dvfs_mode = 0;
fw->sched_mode = ivpu_fw_sched_mode_select(vdev, fw_hdr);
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_info(vdev, "Scheduler mode: %s\n", fw->sched_mode ? "HW" : "OS");
if (fw_hdr->preemption_buffer_1_max_size)
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_max_size;
else
fw->primary_preempt_buf_size = fw_hdr->preemption_buffer_1_size;
if (fw_hdr->preemption_buffer_2_max_size)
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_max_size;
else
fw->secondary_preempt_buf_size = fw_hdr->preemption_buffer_2_size;
ivpu_dbg(vdev, FW_BOOT, "Preemption buffer sizes: primary %u, secondary %u\n",
fw->primary_preempt_buf_size, fw->secondary_preempt_buf_size);
if (fw_hdr->ro_section_start_address && !is_within_range(fw_hdr->ro_section_start_address,
fw_hdr->ro_section_size,
fw_hdr->image_load_address,
@ -566,7 +576,6 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->magic = VPU_BOOT_PARAMS_MAGIC;
boot_params->vpu_id = to_pci_dev(vdev->drm.dev)->bus->number;
boot_params->frequency = ivpu_hw_pll_freq_get(vdev);
/*
* This param is a debug firmware feature. It switches default clock

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_H__
@ -82,9 +82,14 @@ static inline u64 ivpu_hw_range_size(const struct ivpu_addr_range *range)
return range->end - range->start;
}
static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
static inline u32 ivpu_hw_dpu_max_freq_get(struct ivpu_device *vdev)
{
return ivpu_hw_btrs_ratio_to_freq(vdev, ratio);
return ivpu_hw_btrs_dpu_max_freq_get(vdev);
}
static inline u32 ivpu_hw_dpu_freq_get(struct ivpu_device *vdev)
{
return ivpu_hw_btrs_dpu_freq_get(vdev);
}
static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
@ -92,11 +97,6 @@ static inline void ivpu_hw_irq_clear(struct ivpu_device *vdev)
ivpu_hw_ip_irq_clear(vdev);
}
static inline u32 ivpu_hw_pll_freq_get(struct ivpu_device *vdev)
{
return ivpu_hw_btrs_pll_freq_get(vdev);
}
static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
{
return vdev->hw->pll.profiling_freq;

View File

@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#include <linux/units.h>
#include "ivpu_drv.h"
#include "ivpu_hw.h"
#include "ivpu_hw_btrs.h"
@ -28,17 +30,13 @@
#define BTRS_LNL_ALL_IRQ_MASK ((u32)-1)
#define BTRS_MTL_WP_CONFIG_1_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_5_3)
#define BTRS_MTL_WP_CONFIG_1_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_1_TILE, MTL_PLL_RATIO_4_3)
#define BTRS_MTL_WP_CONFIG_2_TILE_5_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_5_3)
#define BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3)
#define BTRS_MTL_WP_CONFIG_0_TILE_PLL_OFF WP_CONFIG(0, 0)
#define PLL_CDYN_DEFAULT 0x80
#define PLL_EPP_DEFAULT 0x80
#define PLL_CONFIG_DEFAULT 0x0
#define PLL_SIMULATION_FREQ 10000000
#define PLL_REF_CLK_FREQ 50000000
#define PLL_REF_CLK_FREQ 50000000ull
#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define TIMEOUT_US (150 * USEC_PER_MSEC)
@ -62,6 +60,8 @@
#define DCT_ENABLE 0x1
#define DCT_DISABLE 0x0
static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio);
int ivpu_hw_btrs_irqs_clear_with_0_mtl(struct ivpu_device *vdev)
{
REGB_WR32(VPU_HW_BTRS_MTL_INTERRUPT_STAT, BTRS_MTL_ALL_IRQ_MASK);
@ -156,7 +156,7 @@ static int info_init_mtl(struct ivpu_device *vdev)
hw->tile_fuse = BTRS_MTL_TILE_FUSE_ENABLE_BOTH;
hw->sku = BTRS_MTL_TILE_SKU_BOTH;
hw->config = BTRS_MTL_WP_CONFIG_2_TILE_4_3_RATIO;
hw->config = WP_CONFIG(MTL_CONFIG_2_TILE, MTL_PLL_RATIO_4_3);
return 0;
}
@ -334,8 +334,8 @@ int ivpu_hw_btrs_wp_drive(struct ivpu_device *vdev, bool enable)
prepare_wp_request(vdev, &wp, enable);
ivpu_dbg(vdev, PM, "PLL workpoint request: %u Hz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
PLL_RATIO_TO_FREQ(wp.target), wp.cfg, wp.epp, wp.cdyn);
ivpu_dbg(vdev, PM, "PLL workpoint request: %lu MHz, config: 0x%x, epp: 0x%x, cdyn: 0x%x\n",
pll_ratio_to_dpu_freq(vdev, wp.target) / HZ_PER_MHZ, wp.cfg, wp.epp, wp.cdyn);
ret = wp_request_send(vdev, &wp);
if (ret) {
@ -573,6 +573,47 @@ int ivpu_hw_btrs_wait_for_idle(struct ivpu_device *vdev)
return REGB_POLL_FLD(VPU_HW_BTRS_LNL_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
}
static u32 pll_config_get_mtl(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
}
static u32 pll_config_get_lnl(struct ivpu_device *vdev)
{
return REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
}
static u32 pll_ratio_to_dpu_freq_mtl(u16 ratio)
{
return (PLL_RATIO_TO_FREQ(ratio) * 2) / 3;
}
static u32 pll_ratio_to_dpu_freq_lnl(u16 ratio)
{
return PLL_RATIO_TO_FREQ(ratio) / 2;
}
static u32 pll_ratio_to_dpu_freq(struct ivpu_device *vdev, u32 ratio)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
return pll_ratio_to_dpu_freq_mtl(ratio);
else
return pll_ratio_to_dpu_freq_lnl(ratio);
}
u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev)
{
return pll_ratio_to_dpu_freq(vdev, vdev->hw->pll.max_ratio);
}
u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
return pll_ratio_to_dpu_freq_mtl(pll_config_get_mtl(vdev));
else
return pll_ratio_to_dpu_freq_lnl(pll_config_get_lnl(vdev));
}
/* Handler for IRQs from Buttress core (irqB) */
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
{
@ -582,9 +623,12 @@ bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq)
if (!status)
return false;
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL));
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
u32 pll = pll_config_get_mtl(vdev);
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
pll, pll_ratio_to_dpu_freq_mtl(pll) / HZ_PER_MHZ);
}
if (REG_TEST_FLD(VPU_HW_BTRS_MTL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR irq 0x%016llx", REGB_RD64(VPU_HW_BTRS_MTL_ATS_ERR_LOG_0));
@ -633,8 +677,12 @@ bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq)
queue_work(system_wq, &vdev->irq_dct_work);
}
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x", REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ));
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, FREQ_CHANGE, status)) {
u32 pll = pll_config_get_lnl(vdev);
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq, wp %08x, %lu MHz",
pll, pll_ratio_to_dpu_freq_lnl(pll) / HZ_PER_MHZ);
}
if (REG_TEST_FLD(VPU_HW_BTRS_LNL_INTERRUPT_STAT, ATS_ERR, status)) {
ivpu_err(vdev, "ATS_ERR LOG1 0x%08x ATS_ERR_LOG2 0x%08x\n",
@ -717,60 +765,6 @@ void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 acti
REGB_WR32(VPU_HW_BTRS_LNL_PCODE_MAILBOX_STATUS, val);
}
static u32 pll_ratio_to_freq_mtl(u32 ratio, u32 config)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
u32 cpu_clock;
if ((config & 0xff) == MTL_PLL_RATIO_4_3)
cpu_clock = pll_clock * 2 / 4;
else
cpu_clock = pll_clock * 2 / 5;
return cpu_clock;
}
u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
{
struct ivpu_hw_info *hw = vdev->hw;
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
return pll_ratio_to_freq_mtl(ratio, hw->config);
else
return PLL_RATIO_TO_FREQ(ratio);
}
static u32 pll_freq_get_mtl(struct ivpu_device *vdev)
{
u32 pll_curr_ratio;
pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_MTL_CURRENT_PLL);
pll_curr_ratio &= VPU_HW_BTRS_MTL_CURRENT_PLL_RATIO_MASK;
if (!ivpu_is_silicon(vdev))
return PLL_SIMULATION_FREQ;
return pll_ratio_to_freq_mtl(pll_curr_ratio, vdev->hw->config);
}
static u32 pll_freq_get_lnl(struct ivpu_device *vdev)
{
u32 pll_curr_ratio;
pll_curr_ratio = REGB_RD32(VPU_HW_BTRS_LNL_PLL_FREQ);
pll_curr_ratio &= VPU_HW_BTRS_LNL_PLL_FREQ_RATIO_MASK;
return PLL_RATIO_TO_FREQ(pll_curr_ratio);
}
u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)
return pll_freq_get_mtl(vdev);
else
return pll_freq_get_lnl(vdev);
}
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev)
{
if (ivpu_hw_btrs_gen(vdev) == IVPU_HW_BTRS_MTL)

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __IVPU_HW_BTRS_H__
@ -13,7 +13,6 @@
#define PLL_PROFILING_FREQ_DEFAULT 38400000
#define PLL_PROFILING_FREQ_HIGH 400000000
#define PLL_RATIO_TO_FREQ(x) ((x) * PLL_REF_CLK_FREQ)
#define DCT_DEFAULT_ACTIVE_PERCENT 15u
#define DCT_PERIOD_US 35300u
@ -32,12 +31,12 @@ int ivpu_hw_btrs_ip_reset(struct ivpu_device *vdev);
void ivpu_hw_btrs_profiling_freq_reg_set_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_ats_print_lnl(struct ivpu_device *vdev);
void ivpu_hw_btrs_clock_relinquish_disable_lnl(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_dpu_max_freq_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_dpu_freq_get(struct ivpu_device *vdev);
bool ivpu_hw_btrs_irq_handler_mtl(struct ivpu_device *vdev, int irq);
bool ivpu_hw_btrs_irq_handler_lnl(struct ivpu_device *vdev, int irq);
int ivpu_hw_btrs_dct_get_request(struct ivpu_device *vdev, bool *enable);
void ivpu_hw_btrs_dct_set_status(struct ivpu_device *vdev, bool enable, u32 dct_percent);
u32 ivpu_hw_btrs_pll_freq_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_ratio_to_freq(struct ivpu_device *vdev, u32 ratio);
u32 ivpu_hw_btrs_telemetry_offset_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_size_get(struct ivpu_device *vdev);
u32 ivpu_hw_btrs_telemetry_enable_get(struct ivpu_device *vdev);

View File

@ -470,8 +470,8 @@ static void ivpu_job_destroy(struct ivpu_job *job)
struct ivpu_device *vdev = job->vdev;
u32 i;
ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d engine %d",
job->job_id, job->file_priv->ctx.id, job->engine_idx);
ivpu_dbg(vdev, JOB, "Job destroyed: id %3u ctx %2d cmdq_id %u engine %d",
job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx);
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
@ -564,8 +564,8 @@ static int ivpu_job_signal_and_destroy(struct ivpu_device *vdev, u32 job_id, u32
dma_fence_signal(job->done_fence);
trace_job("done", job);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d cmdq_id %u engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->cmdq_id, job->engine_idx, job_status);
ivpu_job_destroy(job);
ivpu_stop_job_timeout_detection(vdev);
@ -664,8 +664,8 @@ static int ivpu_job_submit(struct ivpu_job *job, u8 priority, u32 cmdq_id)
}
trace_job("submit", job);
ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d engine %d prio %d addr 0x%llx next %d\n",
job->job_id, file_priv->ctx.id, job->engine_idx, cmdq->priority,
ivpu_dbg(vdev, JOB, "Job submitted: id %3u ctx %2d cmdq_id %u engine %d prio %d addr 0x%llx next %d\n",
job->job_id, file_priv->ctx.id, cmdq->id, job->engine_idx, cmdq->priority,
job->cmd_buf_vpu_addr, cmdq->jobq->header.tail);
mutex_unlock(&file_priv->lock);
@ -777,7 +777,8 @@ static int ivpu_submit(struct drm_file *file, struct ivpu_file_priv *file_priv,
goto err_free_handles;
}
ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u buf_count %u\n", file_priv->ctx.id, buffer_count);
ivpu_dbg(vdev, JOB, "Submit ioctl: ctx %u cmdq_id %u buf_count %u\n",
file_priv->ctx.id, cmdq_id, buffer_count);
job = ivpu_job_create(file_priv, engine, buffer_count);
if (!job) {

View File

@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*/
#include <linux/device.h>
#include <linux/err.h>
#include <linux/pm_runtime.h>
#include <linux/units.h>
#include "ivpu_drv.h"
#include "ivpu_gem.h"
@ -90,10 +92,55 @@ sched_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
static DEVICE_ATTR_RO(sched_mode);
/**
* DOC: npu_max_frequency
*
* The npu_max_frequency shows maximum frequency in MHz of the NPU's data
* processing unit
*/
static ssize_t
npu_max_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
u32 freq = ivpu_hw_dpu_max_freq_get(vdev);
return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
}
static DEVICE_ATTR_RO(npu_max_frequency_mhz);
/**
* DOC: npu_current_frequency_mhz
*
* The npu_current_frequency_mhz shows current frequency in MHz of the NPU's
* data processing unit
*/
static ssize_t
npu_current_frequency_mhz_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
u32 freq = 0;
/* Read frequency only if device is active, otherwise frequency is 0 */
if (pm_runtime_get_if_active(vdev->drm.dev) > 0) {
freq = ivpu_hw_dpu_freq_get(vdev);
pm_runtime_put_autosuspend(vdev->drm.dev);
}
return sysfs_emit(buf, "%lu\n", freq / HZ_PER_MHZ);
}
static DEVICE_ATTR_RO(npu_current_frequency_mhz);
static struct attribute *ivpu_dev_attrs[] = {
&dev_attr_npu_busy_time_us.attr,
&dev_attr_npu_memory_utilization.attr,
&dev_attr_sched_mode.attr,
&dev_attr_npu_max_frequency_mhz.attr,
&dev_attr_npu_current_frequency_mhz.attr,
NULL,
};

View File

@ -26,7 +26,7 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
#define VPU_BOOT_API_VER_MINOR 26
#define VPU_BOOT_API_VER_MINOR 28
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@ -76,8 +76,15 @@ struct vpu_firmware_header {
* submission queue size and device capabilities.
*/
u32 preemption_buffer_2_size;
/*
* Maximum preemption buffer size that the FW can use: no need for the host
* driver to allocate more space than that specified by these fields.
* A value of 0 means no declared limit.
*/
u32 preemption_buffer_1_max_size;
u32 preemption_buffer_2_max_size;
/* Space reserved for future preemption-related fields. */
u32 preemption_reserved[6];
u32 preemption_reserved[4];
/* FW image read only section start address, 4KB aligned */
u64 ro_section_start_address;
/* FW image read only section size, 4KB aligned */
@ -134,7 +141,7 @@ enum vpu_trace_destination {
/*
* Processor bit shifts (for loggable HW components).
*/
#define VPU_TRACE_PROC_BIT_ARM 0
#define VPU_TRACE_PROC_BIT_RESERVED 0
#define VPU_TRACE_PROC_BIT_LRT 1
#define VPU_TRACE_PROC_BIT_LNN 2
#define VPU_TRACE_PROC_BIT_SHV_0 3

View File

@ -22,7 +22,7 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
#define VPU_JSM_API_VER_MINOR 25
#define VPU_JSM_API_VER_MINOR 29
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
@ -53,8 +53,7 @@
* Engine indexes.
*/
#define VPU_ENGINE_COMPUTE 0
#define VPU_ENGINE_COPY 1
#define VPU_ENGINE_NB 2
#define VPU_ENGINE_NB 1
/*
* VPU status values.
@ -126,11 +125,13 @@ enum {
* When set, indicates that job queue uses native fences (as inline commands
* in job queue). Such queues may also use legacy fences (as commands in batch buffers).
* When cleared, indicates the job queue only uses legacy fences.
* NOTE: For queues using native fences, VPU expects that all jobs in the queue
* are immediately followed by an inline command object. This object is expected
* to be a fence signal command in most cases, but can also be a NOP in case the host
* does not need per-job fence signalling. Other inline commands objects can be
* inserted between "job and inline command" pairs.
* NOTES:
* 1. For queues using native fences, VPU expects that all jobs in the queue
* are immediately followed by an inline command object. This object is expected
* to be a fence signal command in most cases, but can also be a NOP in case the host
* does not need per-job fence signalling. Other inline commands objects can be
* inserted between "job and inline command" pairs.
* 2. Native fence queues are only supported on VPU 40xx onwards.
*/
VPU_JOB_QUEUE_FLAGS_USE_NATIVE_FENCE_MASK = (1 << 1U),
@ -275,6 +276,8 @@ struct vpu_inline_cmd {
u64 value;
/* User VA of the log buffer in which to add log entry on completion. */
u64 log_buffer_va;
/* NPU private data. */
u64 npu_private_data;
} fence;
/* Other commands do not have a payload. */
/* Payload definition for future inline commands can be inserted here. */
@ -791,12 +794,22 @@ struct vpu_jsm_metric_streamer_update {
/** Metric group mask that identifies metric streamer instance. */
u64 metric_group_mask;
/**
* Address and size of the buffer where the VPU will write metric data. If
* the buffer address is 0 or same as the currently used buffer the VPU will
* continue writing metric data to the current buffer. In this case the
* buffer size is ignored and the size of the current buffer is unchanged.
* If the address is non-zero and differs from the current buffer address the
* VPU will immediately switch data collection to the new buffer.
* Address and size of the buffer where the VPU will write metric data.
* This member dictates how the update operation should perform:
* 1. client needs information about the number of collected samples and the
* amount of data written to the current buffer
* 2. client wants to switch to a new buffer
*
* Case 1. is identified by the buffer address being 0 or the same as the
* currently used buffer address. In this case the buffer size is ignored and
* the size of the current buffer is unchanged. The VPU will return an update
* in the vpu_jsm_metric_streamer_done structure. The internal writing position
* into the buffer is not changed.
*
* Case 2. is identified by the address being non-zero and differs from the
* current buffer address. The VPU will immediately switch data collection to
* the new buffer. Then the VPU will return an update in the
* vpu_jsm_metric_streamer_done structure.
*/
u64 buffer_addr;
u64 buffer_size;
@ -934,6 +947,7 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
/*
* Default quantum in 100ns units for scheduling across processes
* within a priority band
* Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
@ -946,8 +960,10 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* in situations when it's starved by the focus band.
*/
u32 normal_band_percentage;
/* Reserved */
u32 reserved_0;
/*
* TDR timeout value in milliseconds. Default value of 0 meaning no timeout.
*/
u32 tdr_timeout;
};
/*
@ -1024,7 +1040,10 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
s32 in_process_priority;
/* Zero padding / Reserved */
u32 reserved_1;
/* Context quantum relative to other contexts of same priority in the same process */
/*
* Context quantum relative to other contexts of same priority in the same process
* Minimum value supported by NPU is 1ms (10000 in 100ns units).
*/
u64 context_quantum;
/* Grace period when preempting context of the same priority within the same process */
u64 grace_period_same_priority;

View File

@ -438,15 +438,17 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a
return -EINVAL;
pt = dma_fence_to_sync_pt(fence);
if (!pt)
return -EINVAL;
if (!pt) {
ret = -EINVAL;
goto put_fence;
}
spin_lock_irqsave(fence->lock, flags);
if (test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
data.deadline_ns = ktime_to_ns(pt->deadline);
} else {
if (!test_bit(SW_SYNC_HAS_DEADLINE_BIT, &fence->flags)) {
ret = -ENOENT;
goto unlock;
}
data.deadline_ns = ktime_to_ns(pt->deadline);
spin_unlock_irqrestore(fence->lock, flags);
dma_fence_put(fence);
@ -458,6 +460,13 @@ static int sw_sync_ioctl_get_deadline(struct sync_timeline *obj, unsigned long a
return -EFAULT;
return 0;
unlock:
spin_unlock_irqrestore(fence->lock, flags);
put_fence:
dma_fence_put(fence);
return ret;
}
static long sw_sync_ioctl(struct file *file, unsigned int cmd,

View File

@ -1123,6 +1123,7 @@ struct amdgpu_device {
bool in_s3;
bool in_s4;
bool in_s0ix;
suspend_state_t last_suspend_state;
enum pp_mp1_state mp1_state;
struct amdgpu_doorbell_index doorbell_index;

View File

@ -3510,6 +3510,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
amdgpu_device_mem_scratch_fini(adev);
amdgpu_ib_pool_fini(adev);
amdgpu_seq64_fini(adev);
amdgpu_doorbell_fini(adev);
}
if (adev->ip_blocks[i].version->funcs->sw_fini) {
r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
@ -4858,7 +4859,6 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_doorbell_fini(adev);
drm_dev_exit(idx);
}

View File

@ -2548,8 +2548,20 @@ static int amdgpu_pmops_suspend(struct device *dev)
adev->in_s0ix = true;
else if (amdgpu_acpi_is_s3_active(adev))
adev->in_s3 = true;
if (!adev->in_s0ix && !adev->in_s3)
if (!adev->in_s0ix && !adev->in_s3) {
/* don't allow going deep first time followed by s2idle the next time */
if (adev->last_suspend_state != PM_SUSPEND_ON &&
adev->last_suspend_state != pm_suspend_target_state) {
drm_err_once(drm_dev, "Unsupported suspend state %d\n",
pm_suspend_target_state);
return -EINVAL;
}
return 0;
}
/* cache the state last used for suspend */
adev->last_suspend_state = pm_suspend_target_state;
return amdgpu_device_suspend(drm_dev, true);
}

View File

@ -1438,9 +1438,11 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
struct amdgpu_device *adev = ring->adev;
struct drm_gpu_scheduler *sched = &ring->sched;
struct drm_sched_entity entity;
static atomic_t counter;
struct dma_fence *f;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
void *owner;
int i, r;
/* Initialize the scheduler entity */
@ -1451,9 +1453,15 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
goto err;
}
r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
64, 0,
&job);
/*
* Use some unique dummy value as the owner to make sure we execute
* the cleaner shader on each submission. The value just need to change
* for each submission and is otherwise meaningless.
*/
owner = (void *)(unsigned long)atomic_inc_return(&counter);
r = amdgpu_job_alloc_with_ib(ring->adev, &entity, owner,
64, 0, &job);
if (r)
goto err;

View File

@ -6114,7 +6114,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@ -6192,7 +6192,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0);
@ -6269,7 +6269,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@ -6644,7 +6644,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);

View File

@ -2428,7 +2428,7 @@ static int gfx_v11_0_config_me_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_ME_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0);
@ -2472,7 +2472,7 @@ static int gfx_v11_0_config_pfp_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0);
@ -2517,7 +2517,7 @@ static int gfx_v11_0_config_mec_cache(struct amdgpu_device *adev, uint64_t addr)
}
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
tmp = RREG32_SOC15(GC, 0, regCP_CPC_IC_BASE_CNTL);
tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
@ -3153,7 +3153,7 @@ static int gfx_v11_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@ -3371,7 +3371,7 @@ static int gfx_v11_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@ -4541,7 +4541,7 @@ static int gfx_v11_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;

View File

@ -2324,7 +2324,7 @@ static int gfx_v12_0_cp_gfx_load_pfp_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.pfp.pfp_fw_data_obj);
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_PFP_IC_BASE_LO,
lower_32_bits(adev->gfx.pfp.pfp_fw_gpu_addr));
@ -2468,7 +2468,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->gfx.me.me_fw_data_obj);
if (amdgpu_emu_mode == 1)
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
WREG32_SOC15(GC, 0, regCP_ME_IC_BASE_LO,
lower_32_bits(adev->gfx.me.me_fw_gpu_addr));
@ -3426,7 +3426,7 @@ static int gfx_v12_0_gfxhub_enable(struct amdgpu_device *adev)
if (r)
return r;
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;

View File

@ -268,7 +268,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@ -969,7 +969,7 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev)
adev->hdp.funcs->init_registers(adev);
/* Flush HDP after it is initialized */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;

View File

@ -229,7 +229,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@ -899,7 +899,7 @@ static int gmc_v11_0_gart_enable(struct amdgpu_device *adev)
return r;
/* Flush HDP after it is initialized */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;

View File

@ -297,7 +297,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
return;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
/* This is necessary for SRIOV as well as for GFXOFF to function
* properly under bare metal
@ -881,7 +881,7 @@ static int gmc_v12_0_gart_enable(struct amdgpu_device *adev)
return r;
/* Flush HDP after it is initialized */
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ?
false : true;

View File

@ -2435,7 +2435,7 @@ static int gmc_v9_0_hw_init(struct amdgpu_ip_block *ip_block)
adev->hdp.funcs->init_registers(adev);
/* After HDP is initialized, flush HDP.*/
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;

View File

@ -533,7 +533,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {

View File

@ -610,7 +610,7 @@ static int psp_v13_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {

View File

@ -498,7 +498,7 @@ static int psp_v14_0_memory_training(struct psp_context *psp, uint32_t ops)
}
memcpy_toio(adev->mman.aper_base_kaddr, buf, sz);
adev->hdp.funcs->flush_hdp(adev, NULL);
amdgpu_device_flush_hdp(adev, NULL);
vfree(buf);
drm_dev_exit(idx);
} else {

View File

@ -239,6 +239,13 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] =
.max_pixels_per_frame = 4096 * 4096,
.max_level = 186,
},
{
.codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG,
.max_width = 4096,
.max_height = 4096,
.max_pixels_per_frame = 4096 * 4096,
.max_level = 0,
},
};
static const struct amdgpu_video_codecs cz_video_codecs_decode =

View File

@ -2,6 +2,7 @@
//
// Copyright 2024 Advanced Micro Devices, Inc.
#include <linux/vmalloc.h>
#include "dml2_internal_types.h"
#include "dml_top.h"
@ -13,11 +14,11 @@
static bool dml21_allocate_memory(struct dml2_context **dml_ctx)
{
*dml_ctx = kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
*dml_ctx = vzalloc(sizeof(struct dml2_context));
if (!(*dml_ctx))
return false;
(*dml_ctx)->v21.dml_init.dml2_instance = kzalloc(sizeof(struct dml2_instance), GFP_KERNEL);
(*dml_ctx)->v21.dml_init.dml2_instance = vzalloc(sizeof(struct dml2_instance));
if (!((*dml_ctx)->v21.dml_init.dml2_instance))
return false;
@ -27,7 +28,7 @@ static bool dml21_allocate_memory(struct dml2_context **dml_ctx)
(*dml_ctx)->v21.mode_support.display_config = &(*dml_ctx)->v21.display_config;
(*dml_ctx)->v21.mode_programming.display_config = (*dml_ctx)->v21.mode_support.display_config;
(*dml_ctx)->v21.mode_programming.programming = kzalloc(sizeof(struct dml2_display_cfg_programming), GFP_KERNEL);
(*dml_ctx)->v21.mode_programming.programming = vzalloc(sizeof(struct dml2_display_cfg_programming));
if (!((*dml_ctx)->v21.mode_programming.programming))
return false;
@ -115,8 +116,8 @@ bool dml21_create(const struct dc *in_dc, struct dml2_context **dml_ctx, const s
void dml21_destroy(struct dml2_context *dml2)
{
kfree(dml2->v21.dml_init.dml2_instance);
kfree(dml2->v21.mode_programming.programming);
vfree(dml2->v21.dml_init.dml2_instance);
vfree(dml2->v21.mode_programming.programming);
}
static void dml21_calculate_rq_and_dlg_params(const struct dc *dc, struct dc_state *context, struct resource_context *out_new_hw_state,

View File

@ -24,6 +24,8 @@
*
*/
#include <linux/vmalloc.h>
#include "display_mode_core.h"
#include "dml2_internal_types.h"
#include "dml2_utils.h"
@ -747,7 +749,7 @@ bool dml2_validate(const struct dc *in_dc, struct dc_state *context, struct dml2
static inline struct dml2_context *dml2_allocate_memory(void)
{
return (struct dml2_context *) kzalloc(sizeof(struct dml2_context), GFP_KERNEL);
return (struct dml2_context *) vzalloc(sizeof(struct dml2_context));
}
static void dml2_init(const struct dc *in_dc, const struct dml2_configuration_options *config, struct dml2_context **dml2)
@ -821,7 +823,7 @@ void dml2_destroy(struct dml2_context *dml2)
if (dml2->architecture == dml2_architecture_21)
dml21_destroy(dml2);
kfree(dml2);
vfree(dml2);
}
void dml2_extract_dram_and_fclk_change_support(struct dml2_context *dml2,

View File

@ -161,6 +161,7 @@ struct intel_display_platforms {
#define HAS_DPT(__display) (DISPLAY_VER(__display) >= 13)
#define HAS_DSB(__display) (DISPLAY_INFO(__display)->has_dsb)
#define HAS_DSC(__display) (DISPLAY_RUNTIME_INFO(__display)->has_dsc)
#define HAS_DSC_3ENGINES(__display) (DISPLAY_VERx100(__display) == 1401 && HAS_DSC(__display))
#define HAS_DSC_MST(__display) (DISPLAY_VER(__display) >= 12 && HAS_DSC(__display))
#define HAS_FBC(__display) (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0)
#define HAS_FBC_DIRTY_RECT(__display) (DISPLAY_VER(__display) >= 30)

View File

@ -1050,10 +1050,11 @@ u8 intel_dp_dsc_get_slice_count(const struct intel_connector *connector,
u8 test_slice_count = valid_dsc_slicecount[i] * num_joined_pipes;
/*
* 3 DSC Slices per pipe need 3 DSC engines,
* which is supported only with Ultrajoiner.
* 3 DSC Slices per pipe need 3 DSC engines, which is supported only
* with Ultrajoiner only for some platforms.
*/
if (valid_dsc_slicecount[i] == 3 && num_joined_pipes != 4)
if (valid_dsc_slicecount[i] == 3 &&
(!HAS_DSC_3ENGINES(display) || num_joined_pipes != 4))
continue;
if (test_slice_count >

View File

@ -223,7 +223,7 @@ void mgag200_set_mode_regs(struct mga_device *mdev, const struct drm_display_mod
vsyncstr = mode->crtc_vsync_start - 1;
vsyncend = mode->crtc_vsync_end - 1;
vtotal = mode->crtc_vtotal - 2;
vblkstr = mode->crtc_vblank_start;
vblkstr = mode->crtc_vblank_start - 1;
vblkend = vtotal + 1;
linecomp = vdispend;

View File

@ -242,10 +242,10 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++;
break;
}
@ -377,10 +377,10 @@ static void a7xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
break;
fallthrough;
case MSM_SUBMIT_CMD_BUF:
OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
OUT_RING(ring, submit->cmd[i].size);
OUT_RING(ring, A5XX_CP_INDIRECT_BUFFER_2_IB_SIZE(submit->cmd[i].size));
ibs++;
break;
}

View File

@ -132,7 +132,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_2", .id = INTF_2,
.base = 0x6b000, .len = 0x268,
@ -141,7 +140,6 @@ static const struct dpu_intf_cfg msm8937_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -118,7 +118,6 @@ static const struct dpu_intf_cfg msm8917_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -131,7 +131,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_1", .id = INTF_1,
.base = 0x6a800, .len = 0x268,
@ -140,7 +139,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_2", .id = INTF_2,
.base = 0x6b000, .len = 0x268,
@ -149,7 +147,6 @@ static const struct dpu_intf_cfg msm8953_intf[] = {
.prog_fetch_lines_worst_case = 14,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -241,7 +241,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = {
.prog_fetch_lines_worst_case = 25,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_1", .id = INTF_1,
.base = 0x6a800, .len = 0x268,
@ -250,7 +249,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = {
.prog_fetch_lines_worst_case = 25,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_2", .id = INTF_2,
.base = 0x6b000, .len = 0x268,
@ -259,7 +257,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = {
.prog_fetch_lines_worst_case = 25,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_3", .id = INTF_3,
.base = 0x6b800, .len = 0x268,
@ -267,7 +264,6 @@ static const struct dpu_intf_cfg msm8996_intf[] = {
.prog_fetch_lines_worst_case = 25,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -202,7 +202,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = {
.prog_fetch_lines_worst_case = 21,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_1", .id = INTF_1,
.base = 0x6a800, .len = 0x280,
@ -211,7 +210,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = {
.prog_fetch_lines_worst_case = 21,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_2", .id = INTF_2,
.base = 0x6b000, .len = 0x280,
@ -220,7 +218,6 @@ static const struct dpu_intf_cfg sdm660_intf[] = {
.prog_fetch_lines_worst_case = 21,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 28),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 29),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -147,7 +147,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = {
.prog_fetch_lines_worst_case = 21,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 24),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 25),
.intr_tear_rd_ptr = -1,
}, {
.name = "intf_1", .id = INTF_1,
.base = 0x6a800, .len = 0x280,
@ -156,7 +155,6 @@ static const struct dpu_intf_cfg sdm630_intf[] = {
.prog_fetch_lines_worst_case = 21,
.intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 26),
.intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 27),
.intr_tear_rd_ptr = -1,
},
};

View File

@ -1666,7 +1666,7 @@ static void _dpu_encoder_trigger_flush(struct drm_encoder *drm_enc,
*/
static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys)
{
struct dpu_encoder_virt *dpu_enc = to_dpu_encoder_virt(phys->parent);
struct dpu_encoder_virt *dpu_enc;
if (!phys) {
DPU_ERROR("invalid argument(s)\n");
@ -1678,6 +1678,8 @@ static void _dpu_encoder_trigger_start(struct dpu_encoder_phys *phys)
return;
}
dpu_enc = to_dpu_encoder_virt(phys->parent);
if (phys->parent->encoder_type == DRM_MODE_ENCODER_VIRTUAL &&
dpu_enc->cwb_mask) {
DPU_DEBUG("encoder %d CWB enabled, skipping\n", DRMID(phys->parent));

View File

@ -729,12 +729,40 @@ static int dpu_plane_check_inline_rotation(struct dpu_plane *pdpu,
static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu,
struct dpu_sw_pipe *pipe,
struct dpu_sw_pipe_cfg *pipe_cfg,
const struct msm_format *fmt,
const struct drm_display_mode *mode)
const struct drm_display_mode *mode,
struct drm_plane_state *new_plane_state)
{
uint32_t min_src_size;
struct dpu_kms *kms = _dpu_plane_get_kms(&pdpu->base);
int ret;
const struct msm_format *fmt;
uint32_t supported_rotations;
const struct dpu_sspp_cfg *pipe_hw_caps;
const struct dpu_sspp_sub_blks *sblk;
pipe_hw_caps = pipe->sspp->cap;
sblk = pipe->sspp->cap->sblk;
/*
* We already have verified scaling against platform limitations.
* Now check if the SSPP supports scaling at all.
*/
if (!sblk->scaler_blk.len &&
((drm_rect_width(&new_plane_state->src) >> 16 !=
drm_rect_width(&new_plane_state->dst)) ||
(drm_rect_height(&new_plane_state->src) >> 16 !=
drm_rect_height(&new_plane_state->dst))))
return -ERANGE;
fmt = msm_framebuffer_format(new_plane_state->fb);
supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0;
if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION))
supported_rotations |= DRM_MODE_ROTATE_90;
pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation,
supported_rotations);
min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1;
@ -923,47 +951,20 @@ static int dpu_plane_atomic_check_sspp(struct drm_plane *plane,
struct dpu_plane_state *pstate = to_dpu_plane_state(new_plane_state);
struct dpu_sw_pipe *pipe = &pstate->pipe;
struct dpu_sw_pipe *r_pipe = &pstate->r_pipe;
const struct msm_format *fmt;
struct dpu_sw_pipe_cfg *pipe_cfg = &pstate->pipe_cfg;
struct dpu_sw_pipe_cfg *r_pipe_cfg = &pstate->r_pipe_cfg;
uint32_t supported_rotations;
const struct dpu_sspp_cfg *pipe_hw_caps;
const struct dpu_sspp_sub_blks *sblk;
int ret = 0;
pipe_hw_caps = pipe->sspp->cap;
sblk = pipe->sspp->cap->sblk;
/*
* We already have verified scaling against platform limitations.
* Now check if the SSPP supports scaling at all.
*/
if (!sblk->scaler_blk.len &&
((drm_rect_width(&new_plane_state->src) >> 16 !=
drm_rect_width(&new_plane_state->dst)) ||
(drm_rect_height(&new_plane_state->src) >> 16 !=
drm_rect_height(&new_plane_state->dst))))
return -ERANGE;
fmt = msm_framebuffer_format(new_plane_state->fb);
supported_rotations = DRM_MODE_REFLECT_MASK | DRM_MODE_ROTATE_0;
if (pipe_hw_caps->features & BIT(DPU_SSPP_INLINE_ROTATION))
supported_rotations |= DRM_MODE_ROTATE_90;
pipe_cfg->rotation = drm_rotation_simplify(new_plane_state->rotation,
supported_rotations);
r_pipe_cfg->rotation = pipe_cfg->rotation;
ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt,
&crtc_state->adjusted_mode);
ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg,
&crtc_state->adjusted_mode,
new_plane_state);
if (ret)
return ret;
if (drm_rect_width(&r_pipe_cfg->src_rect) != 0) {
ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg, fmt,
&crtc_state->adjusted_mode);
ret = dpu_plane_atomic_check_pipe(pdpu, r_pipe, r_pipe_cfg,
&crtc_state->adjusted_mode,
new_plane_state);
if (ret)
return ret;
}
@ -1059,6 +1060,9 @@ static int dpu_plane_virtual_atomic_check(struct drm_plane *plane,
struct drm_crtc_state *crtc_state;
int ret;
if (IS_ERR(plane_state))
return PTR_ERR(plane_state);
if (plane_state->crtc)
crtc_state = drm_atomic_get_new_crtc_state(state,
plane_state->crtc);

View File

@ -2259,5 +2259,12 @@ opcode: CP_LOAD_STATE4 (30) (4 dwords)
</reg32>
</domain>
<domain name="CP_INDIRECT_BUFFER" width="32" varset="chip" prefix="chip" variants="A5XX-">
<reg64 offset="0" name="IB_BASE" type="address"/>
<reg32 offset="2" name="2">
<bitfield name="IB_SIZE" low="0" high="19"/>
</reg32>
</domain>
</database>

View File

@ -428,7 +428,8 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
struct v3d_bo *bo = to_v3d_bo(job->base.bo[0]);
struct v3d_bo *indirect = to_v3d_bo(indirect_csd->indirect);
struct drm_v3d_submit_csd *args = &indirect_csd->job->args;
u32 *wg_counts;
struct v3d_dev *v3d = job->base.v3d;
u32 num_batches, *wg_counts;
v3d_get_bo_vaddr(bo);
v3d_get_bo_vaddr(indirect);
@ -441,8 +442,17 @@ v3d_rewrite_csd_job_wg_counts_from_indirect(struct v3d_cpu_job *job)
args->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT;
args->cfg[4] = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
(wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1;
num_batches = DIV_ROUND_UP(indirect_csd->wg_size, 16) *
(wg_counts[0] * wg_counts[1] * wg_counts[2]);
/* V3D 7.1.6 and later don't subtract 1 from the number of batches */
if (v3d->ver < 71 || (v3d->ver == 71 && v3d->rev < 6))
args->cfg[4] = num_batches - 1;
else
args->cfg[4] = num_batches;
WARN_ON(args->cfg[4] == ~0);
for (int i = 0; i < 3; i++) {
/* 0xffffffff indicates that the uniform rewrite is not needed */

View File

@ -145,10 +145,7 @@ static void xe_dma_buf_unmap(struct dma_buf_attachment *attach,
struct sg_table *sgt,
enum dma_data_direction dir)
{
struct dma_buf *dma_buf = attach->dmabuf;
struct xe_bo *bo = gem_to_xe_bo(dma_buf->priv);
if (!xe_bo_is_vram(bo)) {
if (sg_page(sgt->sgl)) {
dma_unmap_sgtable(attach->dev, sgt, dir, 0);
sg_free_table(sgt);
kfree(sgt);

View File

@ -490,24 +490,52 @@ static void fill_engine_enable_masks(struct xe_gt *gt,
engine_enable_mask(gt, XE_ENGINE_CLASS_OTHER));
}
static void guc_prep_golden_lrc_null(struct xe_guc_ads *ads)
/*
* Write the offsets corresponding to the golden LRCs. The actual data is
* populated later by guc_golden_lrc_populate()
*/
static void guc_golden_lrc_init(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
u8 guc_class;
size_t alloc_size, real_size;
u32 addr_ggtt, offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
guc_class = xe_engine_class_to_guc_class(class);
for (guc_class = 0; guc_class <= GUC_MAX_ENGINE_CLASSES; ++guc_class) {
if (!info_map_read(xe, &info_map,
engine_enabled_masks[guc_class]))
continue;
real_size = xe_gt_lrc_size(gt, class);
alloc_size = PAGE_ALIGN(real_size);
/*
* This interface is slightly confusing. We need to pass the
* base address of the full golden context and the size of just
* the engine state, which is the section of the context image
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
* We calculate the engine state size by removing the size of
* what comes before it in the context image (which is identical
* on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
guc_ads_golden_lrc_size(ads) -
xe_lrc_skip_size(xe));
real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
xe_bo_ggtt_addr(ads->bo) +
guc_ads_golden_lrc_offset(ads));
addr_ggtt);
addr_ggtt += alloc_size;
}
}
@ -857,7 +885,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
guc_policies_init(ads);
guc_prep_golden_lrc_null(ads);
guc_golden_lrc_init(ads);
guc_mapping_table_init_invalid(gt, &info_map);
guc_doorbell_init(ads);
@ -883,7 +911,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_policies_init(ads);
fill_engine_enable_masks(gt, &info_map);
guc_mmio_reg_state_init(ads);
guc_prep_golden_lrc_null(ads);
guc_golden_lrc_init(ads);
guc_mapping_table_init(gt, &info_map);
guc_capture_prep_lists(ads);
guc_doorbell_init(ads);
@ -903,18 +931,22 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
guc_ads_private_data_offset(ads));
}
static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
/*
* After the golden LRC's are recorded for each engine class by the first
* submission, copy them to the ADS, as initialized earlier by
* guc_golden_lrc_init().
*/
static void guc_golden_lrc_populate(struct xe_guc_ads *ads)
{
struct xe_device *xe = ads_to_xe(ads);
struct xe_gt *gt = ads_to_gt(ads);
struct iosys_map info_map = IOSYS_MAP_INIT_OFFSET(ads_to_map(ads),
offsetof(struct __guc_ads_blob, system_info));
size_t total_size = 0, alloc_size, real_size;
u32 addr_ggtt, offset;
u32 offset;
int class;
offset = guc_ads_golden_lrc_offset(ads);
addr_ggtt = xe_bo_ggtt_addr(ads->bo) + offset;
for (class = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
u8 guc_class;
@ -931,26 +963,9 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
alloc_size = PAGE_ALIGN(real_size);
total_size += alloc_size;
/*
* This interface is slightly confusing. We need to pass the
* base address of the full golden context and the size of just
* the engine state, which is the section of the context image
* that starts after the execlists LRC registers. This is
* required to allow the GuC to restore just the engine state
* when a watchdog reset occurs.
* We calculate the engine state size by removing the size of
* what comes before it in the context image (which is identical
* on all engines).
*/
ads_blob_write(ads, ads.eng_state_size[guc_class],
real_size - xe_lrc_skip_size(xe));
ads_blob_write(ads, ads.golden_context_lrca[guc_class],
addr_ggtt);
xe_map_memcpy_to(xe, ads_to_map(ads), offset,
gt->default_lrc[class], real_size);
addr_ggtt += alloc_size;
offset += alloc_size;
}
@ -959,7 +974,7 @@ static void guc_populate_golden_lrc(struct xe_guc_ads *ads)
void xe_guc_ads_populate_post_load(struct xe_guc_ads *ads)
{
guc_populate_golden_lrc(ads);
guc_golden_lrc_populate(ads);
}
static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_offset)

View File

@ -19,29 +19,6 @@ static u64 xe_npages_in_range(unsigned long start, unsigned long end)
return (end - start) >> PAGE_SHIFT;
}
/**
* xe_mark_range_accessed() - mark a range is accessed, so core mm
* have such information for memory eviction or write back to
* hard disk
* @range: the range to mark
* @write: if write to this range, we mark pages in this range
* as dirty
*/
static void xe_mark_range_accessed(struct hmm_range *range, bool write)
{
struct page *page;
u64 i, npages;
npages = xe_npages_in_range(range->start, range->end);
for (i = 0; i < npages; i++) {
page = hmm_pfn_to_page(range->hmm_pfns[i]);
if (write)
set_page_dirty_lock(page);
mark_page_accessed(page);
}
}
static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
struct hmm_range *range, struct rw_semaphore *notifier_sem)
{
@ -331,7 +308,6 @@ int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
if (ret)
goto out_unlock;
xe_mark_range_accessed(&hmm_range, write);
userptr->sg = &userptr->sgt;
xe_hmm_userptr_set_mapped(uvma);
userptr->notifier_seq = hmm_range.notifier_seq;

View File

@ -66,9 +66,18 @@ static int pxp_terminate(struct seq_file *m, void *data)
{
struct xe_pxp *pxp = node_to_pxp(m->private);
struct drm_printer p = drm_seq_file_printer(m);
int ready = xe_pxp_get_readiness_status(pxp);
if (!xe_pxp_is_enabled(pxp))
return -ENODEV;
if (ready < 0)
return ready; /* disabled or error occurred */
else if (!ready)
return -EBUSY; /* init still in progress */
/* no need for a termination if PXP is not active */
if (pxp->status != XE_PXP_ACTIVE) {
drm_printf(&p, "PXP not active\n");
return 0;
}
/* simulate a termination interrupt */
spin_lock_irq(&pxp->xe->irq.lock);

View File

@ -585,8 +585,7 @@ static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_obje
*/
static inline bool drm_gem_is_imported(const struct drm_gem_object *obj)
{
/* The dma-buf's priv field points to the original GEM object. */
return obj->dma_buf && (obj->dma_buf->priv != obj);
return !!obj->import_attach;
}
#ifdef CONFIG_LOCKDEP

View File

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*/
#ifndef __UAPI_IVPU_DRM_H__
@ -147,7 +147,7 @@ struct drm_ivpu_param {
* platform type when executing on a simulator or emulator (read-only)
*
* %DRM_IVPU_PARAM_CORE_CLOCK_RATE:
* Current PLL frequency (read-only)
* Maximum frequency of the NPU data processing unit clock (read-only)
*
* %DRM_IVPU_PARAM_NUM_CONTEXTS:
* Maximum number of simultaneously existing contexts (read-only)