Merge: [RHEL9.7] dmaengine: ae4dma driver support

MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6721

# Merge Request Required Information

Upstream-Status: git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git
Upstream-Status: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

## Summary of Changes

Adds support for the AMD ae4dma driver.

## Approved Development Ticket(s)

JIRA: https://issues.redhat.com/browse/RHEL-77318

Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com>

v6: Rebase to c9s/main, added ntb commit that needed to be backported, and a recent ptdma fix.

Approved-by: John W. Linville <linville@redhat.com>
Approved-by: Phil Auld <pauld@redhat.com>
Approved-by: Rafael Aquini <raquini@redhat.com>
Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com>

Merged-by: Augusto Caringi <acaringi@redhat.com>
This commit is contained in:
Augusto Caringi 2025-07-15 15:53:43 -03:00
commit 6f28e8aa2a
13 changed files with 797 additions and 44 deletions

View File

@ -922,6 +922,12 @@ L: linux-edac@vger.kernel.org
S: Supported
F: drivers/ras/amd/atl/*
AMD AE4DMA DRIVER
M: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
L: dmaengine@vger.kernel.org
S: Supported
F: drivers/dma/amd/ae4dma/
AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
M: John Allen <john.allen@amd.com>

View File

@ -1,5 +1,20 @@
# SPDX-License-Identifier: GPL-2.0-only
#
config AMD_AE4DMA
tristate "AMD AE4DMA Engine"
depends on (X86_64 || COMPILE_TEST) && PCI
depends on AMD_PTDMA
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
Enable support for the AMD AE4DMA controller. This controller
provides DMA capabilities to perform high bandwidth memory to
memory and IO copy operations. It performs DMA transfer through
queue-based descriptor management. This DMA controller is intended
to be used with AMD Non-Transparent Bridge devices and not for
general purpose peripheral DMA.
config AMD_PTDMA
tristate "AMD PassThru DMA Engine"
depends on X86_64 && PCI

View File

@ -1,4 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_AMD_AE4DMA) += ae4dma/
obj-$(CONFIG_AMD_PTDMA) += ptdma/
obj-$(CONFIG_AMD_QDMA) += qdma/

View File

@ -0,0 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
#
# AMD AE4DMA driver
#
obj-$(CONFIG_AMD_AE4DMA) += ae4dma.o
ae4dma-objs := ae4dma-dev.o
ae4dma-$(CONFIG_PCI) += ae4dma-pci.o

View File

@ -0,0 +1,157 @@
// SPDX-License-Identifier: GPL-2.0
/*
* AMD AE4DMA driver
*
* Copyright (c) 2024, Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
*/
#include "ae4dma.h"
static unsigned int max_hw_q = 1;
module_param(max_hw_q, uint, 0444);
MODULE_PARM_DESC(max_hw_q, "max hw queues supported by engine (any non-zero value, default: 1)");
static void ae4_pending_work(struct work_struct *work)
{
struct ae4_cmd_queue *ae4cmd_q = container_of(work, struct ae4_cmd_queue, p_work.work);
struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
struct pt_cmd *cmd;
u32 cridx;
for (;;) {
wait_event_interruptible(ae4cmd_q->q_w,
((atomic64_read(&ae4cmd_q->done_cnt)) <
atomic64_read(&ae4cmd_q->intr_cnt)));
atomic64_inc(&ae4cmd_q->done_cnt);
mutex_lock(&ae4cmd_q->cmd_lock);
cridx = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
while ((ae4cmd_q->dridx != cridx) && !list_empty(&ae4cmd_q->cmd)) {
cmd = list_first_entry(&ae4cmd_q->cmd, struct pt_cmd, entry);
list_del(&cmd->entry);
ae4_check_status_error(ae4cmd_q, ae4cmd_q->dridx);
cmd->pt_cmd_callback(cmd->data, cmd->ret);
ae4cmd_q->q_cmd_count--;
ae4cmd_q->dridx = (ae4cmd_q->dridx + 1) % CMD_Q_LEN;
complete_all(&ae4cmd_q->cmp);
}
mutex_unlock(&ae4cmd_q->cmd_lock);
}
}
static irqreturn_t ae4_core_irq_handler(int irq, void *data)
{
struct ae4_cmd_queue *ae4cmd_q = data;
struct pt_cmd_queue *cmd_q;
struct pt_device *pt;
u32 status;
cmd_q = &ae4cmd_q->cmd_q;
pt = cmd_q->pt;
pt->total_interrupts++;
atomic64_inc(&ae4cmd_q->intr_cnt);
status = readl(cmd_q->reg_control + AE4_INTR_STS_OFF);
if (status & BIT(0)) {
status &= GENMASK(31, 1);
writel(status, cmd_q->reg_control + AE4_INTR_STS_OFF);
}
wake_up(&ae4cmd_q->q_w);
return IRQ_HANDLED;
}
void ae4_destroy_work(struct ae4_device *ae4)
{
struct ae4_cmd_queue *ae4cmd_q;
int i;
for (i = 0; i < ae4->cmd_q_count; i++) {
ae4cmd_q = &ae4->ae4cmd_q[i];
if (!ae4cmd_q->pws)
break;
cancel_delayed_work_sync(&ae4cmd_q->p_work);
destroy_workqueue(ae4cmd_q->pws);
}
}
int ae4_core_init(struct ae4_device *ae4)
{
struct pt_device *pt = &ae4->pt;
struct ae4_cmd_queue *ae4cmd_q;
struct device *dev = pt->dev;
struct pt_cmd_queue *cmd_q;
int i, ret = 0;
writel(max_hw_q, pt->io_regs);
for (i = 0; i < max_hw_q; i++) {
ae4cmd_q = &ae4->ae4cmd_q[i];
ae4cmd_q->id = ae4->cmd_q_count;
ae4->cmd_q_count++;
cmd_q = &ae4cmd_q->cmd_q;
cmd_q->pt = pt;
cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
ret = devm_request_irq(dev, ae4->ae4_irq[i], ae4_core_irq_handler, 0,
dev_name(pt->dev), ae4cmd_q);
if (ret)
return ret;
cmd_q->qsize = Q_SIZE(sizeof(struct ae4dma_desc));
cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, &cmd_q->qbase_dma,
GFP_KERNEL);
if (!cmd_q->qbase)
return -ENOMEM;
}
for (i = 0; i < ae4->cmd_q_count; i++) {
ae4cmd_q = &ae4->ae4cmd_q[i];
cmd_q = &ae4cmd_q->cmd_q;
cmd_q->reg_control = pt->io_regs + ((i + 1) * AE4_Q_SZ);
/* Update the device registers with queue information. */
writel(CMD_Q_LEN, cmd_q->reg_control + AE4_MAX_IDX_OFF);
cmd_q->qdma_tail = cmd_q->qbase_dma;
writel(lower_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_L_OFF);
writel(upper_32_bits(cmd_q->qdma_tail), cmd_q->reg_control + AE4_Q_BASE_H_OFF);
INIT_LIST_HEAD(&ae4cmd_q->cmd);
init_waitqueue_head(&ae4cmd_q->q_w);
ae4cmd_q->pws = alloc_ordered_workqueue("ae4dma_%d", WQ_MEM_RECLAIM, ae4cmd_q->id);
if (!ae4cmd_q->pws) {
ae4_destroy_work(ae4);
return -ENOMEM;
}
INIT_DELAYED_WORK(&ae4cmd_q->p_work, ae4_pending_work);
queue_delayed_work(ae4cmd_q->pws, &ae4cmd_q->p_work, usecs_to_jiffies(100));
init_completion(&ae4cmd_q->cmp);
}
ret = pt_dmaengine_register(pt);
if (ret)
ae4_destroy_work(ae4);
else
ptdma_debugfs_setup(pt);
return ret;
}

View File

@ -0,0 +1,156 @@
// SPDX-License-Identifier: GPL-2.0
/*
* AMD AE4DMA driver
*
* Copyright (c) 2024, Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
*/
#include "ae4dma.h"
static int ae4_get_irqs(struct ae4_device *ae4)
{
struct ae4_msix *ae4_msix = ae4->ae4_msix;
struct pt_device *pt = &ae4->pt;
struct device *dev = pt->dev;
struct pci_dev *pdev;
int i, v, ret;
pdev = to_pci_dev(dev);
for (v = 0; v < ARRAY_SIZE(ae4_msix->msix_entry); v++)
ae4_msix->msix_entry[v].entry = v;
ret = pci_alloc_irq_vectors(pdev, v, v, PCI_IRQ_MSIX);
if (ret != v) {
if (ret > 0)
pci_free_irq_vectors(pdev);
dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
if (ret < 0) {
dev_err(dev, "could not enable MSI (%d)\n", ret);
return ret;
}
ret = pci_irq_vector(pdev, 0);
if (ret < 0) {
pci_free_irq_vectors(pdev);
return ret;
}
for (i = 0; i < MAX_AE4_HW_QUEUES; i++)
ae4->ae4_irq[i] = ret;
} else {
ae4_msix->msix_count = ret;
for (i = 0; i < ae4_msix->msix_count; i++)
ae4->ae4_irq[i] = pci_irq_vector(pdev, i);
}
return ret;
}
static void ae4_free_irqs(struct ae4_device *ae4)
{
struct ae4_msix *ae4_msix = ae4->ae4_msix;
struct pt_device *pt = &ae4->pt;
struct device *dev = pt->dev;
struct pci_dev *pdev;
pdev = to_pci_dev(dev);
if (ae4_msix && (ae4_msix->msix_count || ae4->ae4_irq[MAX_AE4_HW_QUEUES - 1]))
pci_free_irq_vectors(pdev);
}
static void ae4_deinit(struct ae4_device *ae4)
{
ae4_free_irqs(ae4);
}
static int ae4_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct ae4_device *ae4;
struct pt_device *pt;
int bar_mask;
int ret = 0;
ae4 = devm_kzalloc(dev, sizeof(*ae4), GFP_KERNEL);
if (!ae4)
return -ENOMEM;
ae4->ae4_msix = devm_kzalloc(dev, sizeof(struct ae4_msix), GFP_KERNEL);
if (!ae4->ae4_msix)
return -ENOMEM;
ret = pcim_enable_device(pdev);
if (ret)
goto ae4_error;
bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
ret = pcim_iomap_regions(pdev, bar_mask, "ae4dma");
if (ret)
goto ae4_error;
pt = &ae4->pt;
pt->dev = dev;
pt->ver = AE4_DMA_VERSION;
pt->io_regs = pcim_iomap_table(pdev)[0];
if (!pt->io_regs) {
ret = -ENOMEM;
goto ae4_error;
}
ret = ae4_get_irqs(ae4);
if (ret < 0)
goto ae4_error;
pci_set_master(pdev);
dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
dev_set_drvdata(dev, ae4);
ret = ae4_core_init(ae4);
if (ret)
goto ae4_error;
return 0;
ae4_error:
ae4_deinit(ae4);
return ret;
}
static void ae4_pci_remove(struct pci_dev *pdev)
{
struct ae4_device *ae4 = dev_get_drvdata(&pdev->dev);
ae4_destroy_work(ae4);
ae4_deinit(ae4);
}
static const struct pci_device_id ae4_pci_table[] = {
{ PCI_VDEVICE(AMD, 0x149B), },
/* Last entry must be zero */
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ae4_pci_table);
static struct pci_driver ae4_pci_driver = {
.name = "ae4dma",
.id_table = ae4_pci_table,
.probe = ae4_pci_probe,
.remove = ae4_pci_remove,
};
module_pci_driver(ae4_pci_driver);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("AMD AE4DMA driver");

View File

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* AMD AE4DMA driver
*
* Copyright (c) 2024, Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Author: Basavaraj Natikar <Basavaraj.Natikar@amd.com>
*/
#ifndef __AE4DMA_H__
#define __AE4DMA_H__
#include <linux/device.h>
#include <linux/dmaengine.h>
#include <linux/dmapool.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include "../ptdma/ptdma.h"
#include "../../virt-dma.h"
#define MAX_AE4_HW_QUEUES 16
#define AE4_DESC_COMPLETED 0x03
#define AE4_MAX_IDX_OFF 0x08
#define AE4_RD_IDX_OFF 0x0c
#define AE4_WR_IDX_OFF 0x10
#define AE4_INTR_STS_OFF 0x14
#define AE4_Q_BASE_L_OFF 0x18
#define AE4_Q_BASE_H_OFF 0x1c
#define AE4_Q_SZ 0x20
#define AE4_DMA_VERSION 4
#define CMD_AE4_DESC_DW0_VAL 2
#define AE4_TIME_OUT 5000
struct ae4_msix {
int msix_count;
struct msix_entry msix_entry[MAX_AE4_HW_QUEUES];
};
struct ae4_cmd_queue {
struct ae4_device *ae4;
struct pt_cmd_queue cmd_q;
struct list_head cmd;
/* protect command operations */
struct mutex cmd_lock;
struct delayed_work p_work;
struct workqueue_struct *pws;
struct completion cmp;
wait_queue_head_t q_w;
atomic64_t intr_cnt;
atomic64_t done_cnt;
u64 q_cmd_count;
u32 dridx;
u32 tail_wi;
u32 id;
};
union dwou {
u32 dw0;
struct dword0 {
u8 byte0;
u8 byte1;
u16 timestamp;
} dws;
};
struct dword1 {
u8 status;
u8 err_code;
u16 desc_id;
};
struct ae4dma_desc {
union dwou dwouv;
struct dword1 dw1;
u32 length;
u32 rsvd;
u32 src_hi;
u32 src_lo;
u32 dst_hi;
u32 dst_lo;
};
struct ae4_device {
struct pt_device pt;
struct ae4_msix *ae4_msix;
struct ae4_cmd_queue ae4cmd_q[MAX_AE4_HW_QUEUES];
unsigned int ae4_irq[MAX_AE4_HW_QUEUES];
unsigned int cmd_q_count;
};
int ae4_core_init(struct ae4_device *ae4);
void ae4_destroy_work(struct ae4_device *ae4);
void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx);
#endif

View File

@ -13,6 +13,7 @@
#include <linux/seq_file.h>
#include "ptdma.h"
#include "../ae4dma/ae4dma.h"
/* DebugFS helpers */
#define RI_VERSION_NUM 0x0000003F
@ -23,11 +24,19 @@
static int pt_debugfs_info_show(struct seq_file *s, void *p)
{
struct pt_device *pt = s->private;
struct ae4_device *ae4;
unsigned int regval;
seq_printf(s, "Device name: %s\n", dev_name(pt->dev));
seq_printf(s, " # Queues: %d\n", 1);
seq_printf(s, " # Cmds: %d\n", pt->cmd_count);
if (pt->ver == AE4_DMA_VERSION) {
ae4 = container_of(pt, struct ae4_device, pt);
seq_printf(s, " # Queues: %d\n", ae4->cmd_q_count);
seq_printf(s, " # Cmds per queue: %d\n", CMD_Q_LEN);
} else {
seq_printf(s, " # Queues: %d\n", 1);
seq_printf(s, " # Cmds: %d\n", pt->cmd_count);
}
regval = ioread32(pt->io_regs + CMD_PT_VERSION);
@ -55,6 +64,7 @@ static int pt_debugfs_stats_show(struct seq_file *s, void *p)
static int pt_debugfs_queue_show(struct seq_file *s, void *p)
{
struct pt_cmd_queue *cmd_q = s->private;
struct pt_device *pt;
unsigned int regval;
if (!cmd_q)
@ -62,18 +72,24 @@ static int pt_debugfs_queue_show(struct seq_file *s, void *p)
seq_printf(s, " Pass-Thru: %ld\n", cmd_q->total_pt_ops);
regval = ioread32(cmd_q->reg_control + 0x000C);
pt = cmd_q->pt;
if (pt->ver == AE4_DMA_VERSION) {
regval = readl(cmd_q->reg_control + 0x4);
seq_printf(s, " Enabled Interrupts:: status 0x%x\n", regval);
} else {
regval = ioread32(cmd_q->reg_control + 0x000C);
seq_puts(s, " Enabled Interrupts:");
if (regval & INT_EMPTY_QUEUE)
seq_puts(s, " EMPTY");
if (regval & INT_QUEUE_STOPPED)
seq_puts(s, " STOPPED");
if (regval & INT_ERROR)
seq_puts(s, " ERROR");
if (regval & INT_COMPLETION)
seq_puts(s, " COMPLETION");
seq_puts(s, "\n");
seq_puts(s, " Enabled Interrupts:");
if (regval & INT_EMPTY_QUEUE)
seq_puts(s, " EMPTY");
if (regval & INT_QUEUE_STOPPED)
seq_puts(s, " STOPPED");
if (regval & INT_ERROR)
seq_puts(s, " ERROR");
if (regval & INT_COMPLETION)
seq_puts(s, " COMPLETION");
seq_puts(s, "\n");
}
return 0;
}
@ -84,8 +100,12 @@ DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats);
void ptdma_debugfs_setup(struct pt_device *pt)
{
struct pt_cmd_queue *cmd_q;
struct dentry *debugfs_q_instance;
struct ae4_cmd_queue *ae4cmd_q;
struct pt_cmd_queue *cmd_q;
struct ae4_device *ae4;
char name[30];
int i;
if (!debugfs_initialized())
return;
@ -96,11 +116,28 @@ void ptdma_debugfs_setup(struct pt_device *pt)
debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt,
&pt_debugfs_stats_fops);
cmd_q = &pt->cmd_q;
debugfs_q_instance =
debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
if (pt->ver == AE4_DMA_VERSION) {
ae4 = container_of(pt, struct ae4_device, pt);
for (i = 0; i < ae4->cmd_q_count; i++) {
ae4cmd_q = &ae4->ae4cmd_q[i];
cmd_q = &ae4cmd_q->cmd_q;
debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
&pt_debugfs_queue_fops);
memset(name, 0, sizeof(name));
snprintf(name, 29, "q%d", ae4cmd_q->id);
debugfs_q_instance =
debugfs_create_dir(name, pt->dma_dev.dbg_dev_root);
debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
&pt_debugfs_queue_fops);
}
} else {
debugfs_q_instance =
debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
cmd_q = &pt->cmd_q;
debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
&pt_debugfs_queue_fops);
}
}
EXPORT_SYMBOL_GPL(ptdma_debugfs_setup);

View File

@ -9,9 +9,59 @@
* Author: Gary R Hook <gary.hook@amd.com>
*/
#include <linux/bitfield.h>
#include "ptdma.h"
#include "../ae4dma/ae4dma.h"
#include "../../dmaengine.h"
static char *ae4_error_codes[] = {
"",
"ERR 01: INVALID HEADER DW0",
"ERR 02: INVALID STATUS",
"ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
"ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
"ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
"ERR 06: INVALID ALIGNMENT",
"ERR 07: INVALID DESCRIPTOR",
};
static void ae4_log_error(struct pt_device *d, int e)
{
/* ERR 01 - 07 represents Invalid AE4 errors */
if (e <= 7)
dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
/* ERR 08 - 15 represents Invalid Descriptor errors */
else if (e > 7 && e <= 15)
dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
/* ERR 16 - 31 represents Firmware errors */
else if (e > 15 && e <= 31)
dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e);
/* ERR 32 - 63 represents Fatal errors */
else if (e > 31 && e <= 63)
dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e);
/* ERR 64 - 255 represents PTE errors */
else if (e > 63 && e <= 255)
dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
else
dev_info(d->dev, "Unknown AE4DMA error");
}
void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
{
struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
struct ae4dma_desc desc;
u8 status;
memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
status = desc.dw1.status;
if (status && status != AE4_DESC_COMPLETED) {
cmd_q->cmd_error = desc.dw1.err_code;
if (cmd_q->cmd_error)
ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
}
}
EXPORT_SYMBOL_GPL(ae4_check_status_error);
static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
{
return container_of(dma_chan, struct pt_dma_chan, vc.chan);
@ -44,7 +94,71 @@ static void pt_do_cleanup(struct virt_dma_desc *vd)
kmem_cache_free(pt->dma_desc_cache, desc);
}
static int pt_dma_start_desc(struct pt_dma_desc *desc)
static struct pt_cmd_queue *pt_get_cmd_queue(struct pt_device *pt, struct pt_dma_chan *chan)
{
struct ae4_cmd_queue *ae4cmd_q;
struct pt_cmd_queue *cmd_q;
struct ae4_device *ae4;
if (pt->ver == AE4_DMA_VERSION) {
ae4 = container_of(pt, struct ae4_device, pt);
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
cmd_q = &ae4cmd_q->cmd_q;
} else {
cmd_q = &pt->cmd_q;
}
return cmd_q;
}
static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q)
{
bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
if (soc) {
desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0);
desc->dwouv.dw0 &= ~DWORD0_SOC;
}
mutex_lock(&ae4cmd_q->cmd_lock);
memcpy(&cmd_q->qbase[ae4cmd_q->tail_wi], desc, sizeof(struct ae4dma_desc));
ae4cmd_q->q_cmd_count++;
ae4cmd_q->tail_wi = (ae4cmd_q->tail_wi + 1) % CMD_Q_LEN;
writel(ae4cmd_q->tail_wi, cmd_q->reg_control + AE4_WR_IDX_OFF);
mutex_unlock(&ae4cmd_q->cmd_lock);
wake_up(&ae4cmd_q->q_w);
return 0;
}
static int pt_core_perform_passthru_ae4(struct pt_cmd_queue *cmd_q,
struct pt_passthru_engine *pt_engine)
{
struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
struct ae4dma_desc desc;
cmd_q->cmd_error = 0;
cmd_q->total_pt_ops++;
memset(&desc, 0, sizeof(desc));
desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
desc.dw1.status = 0;
desc.dw1.err_code = 0;
desc.dw1.desc_id = 0;
desc.length = pt_engine->src_len;
desc.src_lo = upper_32_bits(pt_engine->src_dma);
desc.src_hi = lower_32_bits(pt_engine->src_dma);
desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
return ae4_core_execute_cmd(&desc, ae4cmd_q);
}
static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan)
{
struct pt_passthru_engine *pt_engine;
struct pt_device *pt;
@ -55,13 +169,18 @@ static int pt_dma_start_desc(struct pt_dma_desc *desc)
pt_cmd = &desc->pt_cmd;
pt = pt_cmd->pt;
cmd_q = &pt->cmd_q;
cmd_q = pt_get_cmd_queue(pt, chan);
pt_engine = &pt_cmd->passthru;
pt->tdata.cmd = pt_cmd;
/* Execute the command */
pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
if (pt->ver == AE4_DMA_VERSION)
pt_cmd->ret = pt_core_perform_passthru_ae4(cmd_q, pt_engine);
else
pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
return 0;
}
@ -79,8 +198,10 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
{
struct dma_async_tx_descriptor *tx_desc;
struct virt_dma_desc *vd;
struct pt_device *pt;
unsigned long flags;
pt = chan->pt;
/* Loop over descriptors until one is found with commands */
do {
if (desc) {
@ -98,7 +219,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
spin_lock_irqsave(&chan->vc.lock, flags);
if (desc) {
if (pt->ver != AE4_DMA_VERSION && desc) {
if (desc->status != DMA_COMPLETE) {
if (desc->status != DMA_ERROR)
desc->status = DMA_COMPLETE;
@ -116,7 +237,7 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
spin_unlock_irqrestore(&chan->vc.lock, flags);
if (tx_desc) {
if (pt->ver != AE4_DMA_VERSION && tx_desc) {
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
dma_run_dependencies(tx_desc);
vchan_vdesc_fini(vd);
@ -126,11 +247,25 @@ static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
return NULL;
}
static inline bool ae4_core_queue_full(struct pt_cmd_queue *cmd_q)
{
u32 front_wi = readl(cmd_q->reg_control + AE4_WR_IDX_OFF);
u32 rear_ri = readl(cmd_q->reg_control + AE4_RD_IDX_OFF);
if (((MAX_CMD_QLEN + front_wi - rear_ri) % MAX_CMD_QLEN) >= (MAX_CMD_QLEN - 1))
return true;
return false;
}
static void pt_cmd_callback(void *data, int err)
{
struct pt_dma_desc *desc = data;
struct ae4_cmd_queue *ae4cmd_q;
struct dma_chan *dma_chan;
struct pt_dma_chan *chan;
struct ae4_device *ae4;
struct pt_device *pt;
int ret;
if (err == -EINPROGRESS)
@ -138,11 +273,32 @@ static void pt_cmd_callback(void *data, int err)
dma_chan = desc->vd.tx.chan;
chan = to_pt_chan(dma_chan);
pt = chan->pt;
if (err)
desc->status = DMA_ERROR;
while (true) {
if (pt->ver == AE4_DMA_VERSION) {
ae4 = container_of(pt, struct ae4_device, pt);
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
if (ae4cmd_q->q_cmd_count >= (CMD_Q_LEN - 1) ||
ae4_core_queue_full(&ae4cmd_q->cmd_q)) {
wake_up(&ae4cmd_q->q_w);
if (wait_for_completion_timeout(&ae4cmd_q->cmp,
msecs_to_jiffies(AE4_TIME_OUT))
== 0) {
dev_err(pt->dev, "TIMEOUT %d:\n", ae4cmd_q->id);
break;
}
reinit_completion(&ae4cmd_q->cmp);
continue;
}
}
/* Check for DMA descriptor completion */
desc = pt_handle_active_desc(chan, desc);
@ -150,7 +306,7 @@ static void pt_cmd_callback(void *data, int err)
if (!desc)
break;
ret = pt_dma_start_desc(desc);
ret = pt_dma_start_desc(desc, chan);
if (!ret)
break;
@ -177,6 +333,50 @@ static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
return desc;
}
static void pt_cmd_callback_work(void *data, int err)
{
struct dma_async_tx_descriptor *tx_desc;
struct pt_dma_desc *desc = data;
struct dma_chan *dma_chan;
struct virt_dma_desc *vd;
struct pt_dma_chan *chan;
unsigned long flags;
if (!desc)
return;
dma_chan = desc->vd.tx.chan;
chan = to_pt_chan(dma_chan);
if (err == -EINPROGRESS)
return;
tx_desc = &desc->vd.tx;
vd = &desc->vd;
if (err)
desc->status = DMA_ERROR;
spin_lock_irqsave(&chan->vc.lock, flags);
if (desc->status != DMA_COMPLETE) {
if (desc->status != DMA_ERROR)
desc->status = DMA_COMPLETE;
dma_cookie_complete(tx_desc);
dma_descriptor_unmap(tx_desc);
} else {
tx_desc = NULL;
}
spin_unlock_irqrestore(&chan->vc.lock, flags);
if (tx_desc) {
dmaengine_desc_get_callback_invoke(tx_desc, NULL);
dma_run_dependencies(tx_desc);
list_del(&desc->vd.node);
vchan_vdesc_fini(vd);
}
}
static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
dma_addr_t dst,
dma_addr_t src,
@ -185,7 +385,10 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_passthru_engine *pt_engine;
struct pt_device *pt = chan->pt;
struct ae4_cmd_queue *ae4cmd_q;
struct pt_dma_desc *desc;
struct ae4_device *ae4;
struct pt_cmd *pt_cmd;
desc = pt_alloc_dma_desc(chan, flags);
@ -193,7 +396,7 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
return NULL;
pt_cmd = &desc->pt_cmd;
pt_cmd->pt = chan->pt;
pt_cmd->pt = pt;
pt_engine = &pt_cmd->passthru;
pt_cmd->engine = PT_ENGINE_PASSTHRU;
pt_engine->src_dma = src;
@ -204,6 +407,15 @@ static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
desc->len = len;
if (pt->ver == AE4_DMA_VERSION) {
pt_cmd->pt_cmd_callback = pt_cmd_callback_work;
ae4 = container_of(pt, struct ae4_device, pt);
ae4cmd_q = &ae4->ae4cmd_q[chan->id];
mutex_lock(&ae4cmd_q->cmd_lock);
list_add_tail(&pt_cmd->entry, &ae4cmd_q->cmd);
mutex_unlock(&ae4cmd_q->cmd_lock);
}
return desc;
}
@ -237,13 +449,16 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_dma_desc *desc;
struct pt_device *pt;
unsigned long flags;
bool engine_is_idle = true;
pt = chan->pt;
spin_lock_irqsave(&chan->vc.lock, flags);
desc = pt_next_dma_desc(chan);
if (desc)
if (desc && pt->ver != AE4_DMA_VERSION)
engine_is_idle = false;
vchan_issue_pending(&chan->vc);
@ -257,24 +472,43 @@ static void pt_issue_pending(struct dma_chan *dma_chan)
pt_cmd_callback(desc, 0);
}
static void pt_check_status_trans_ae4(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
{
struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
int i;
for (i = 0; i < CMD_Q_LEN; i++)
ae4_check_status_error(ae4cmd_q, i);
}
static enum dma_status
pt_tx_status(struct dma_chan *c, dma_cookie_t cookie,
struct dma_tx_state *txstate)
{
struct pt_device *pt = to_pt_chan(c)->pt;
struct pt_cmd_queue *cmd_q = &pt->cmd_q;
struct pt_dma_chan *chan = to_pt_chan(c);
struct pt_device *pt = chan->pt;
struct pt_cmd_queue *cmd_q;
cmd_q = pt_get_cmd_queue(pt, chan);
if (pt->ver == AE4_DMA_VERSION)
pt_check_status_trans_ae4(pt, cmd_q);
else
pt_check_status_trans(pt, cmd_q);
pt_check_status_trans(pt, cmd_q);
return dma_cookie_status(c, cookie, txstate);
}
static int pt_pause(struct dma_chan *dma_chan)
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_device *pt = chan->pt;
struct pt_cmd_queue *cmd_q;
unsigned long flags;
spin_lock_irqsave(&chan->vc.lock, flags);
pt_stop_queue(&chan->pt->cmd_q);
cmd_q = pt_get_cmd_queue(pt, chan);
pt_stop_queue(cmd_q);
spin_unlock_irqrestore(&chan->vc.lock, flags);
return 0;
@ -284,10 +518,13 @@ static int pt_resume(struct dma_chan *dma_chan)
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_dma_desc *desc = NULL;
struct pt_device *pt = chan->pt;
struct pt_cmd_queue *cmd_q;
unsigned long flags;
spin_lock_irqsave(&chan->vc.lock, flags);
pt_start_queue(&chan->pt->cmd_q);
cmd_q = pt_get_cmd_queue(pt, chan);
pt_start_queue(cmd_q);
desc = pt_next_dma_desc(chan);
spin_unlock_irqrestore(&chan->vc.lock, flags);
@ -301,11 +538,17 @@ static int pt_resume(struct dma_chan *dma_chan)
static int pt_terminate_all(struct dma_chan *dma_chan)
{
struct pt_dma_chan *chan = to_pt_chan(dma_chan);
struct pt_device *pt = chan->pt;
struct pt_cmd_queue *cmd_q;
unsigned long flags;
struct pt_cmd_queue *cmd_q = &chan->pt->cmd_q;
LIST_HEAD(head);
iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
cmd_q = pt_get_cmd_queue(pt, chan);
if (pt->ver == AE4_DMA_VERSION)
pt_stop_queue(cmd_q);
else
iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
spin_lock_irqsave(&chan->vc.lock, flags);
vchan_get_all_descriptors(&chan->vc, &head);
spin_unlock_irqrestore(&chan->vc.lock, flags);
@ -318,14 +561,24 @@ static int pt_terminate_all(struct dma_chan *dma_chan)
int pt_dmaengine_register(struct pt_device *pt)
{
struct pt_dma_chan *chan;
struct dma_device *dma_dev = &pt->dma_dev;
char *cmd_cache_name;
struct ae4_cmd_queue *ae4cmd_q = NULL;
struct ae4_device *ae4 = NULL;
struct pt_dma_chan *chan;
char *desc_cache_name;
int ret;
char *cmd_cache_name;
int ret, i;
if (pt->ver == AE4_DMA_VERSION)
ae4 = container_of(pt, struct ae4_device, pt);
if (ae4)
pt->pt_dma_chan = devm_kcalloc(pt->dev, ae4->cmd_q_count,
sizeof(*pt->pt_dma_chan), GFP_KERNEL);
else
pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
GFP_KERNEL);
pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
GFP_KERNEL);
if (!pt->pt_dma_chan)
return -ENOMEM;
@ -367,9 +620,6 @@ int pt_dmaengine_register(struct pt_device *pt)
INIT_LIST_HEAD(&dma_dev->channels);
chan = pt->pt_dma_chan;
chan->pt = pt;
/* Set base and prep routines */
dma_dev->device_free_chan_resources = pt_free_chan_resources;
dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
@ -381,8 +631,21 @@ int pt_dmaengine_register(struct pt_device *pt)
dma_dev->device_terminate_all = pt_terminate_all;
dma_dev->device_synchronize = pt_synchronize;
chan->vc.desc_free = pt_do_cleanup;
vchan_init(&chan->vc, dma_dev);
if (ae4) {
for (i = 0; i < ae4->cmd_q_count; i++) {
chan = pt->pt_dma_chan + i;
ae4cmd_q = &ae4->ae4cmd_q[i];
chan->id = ae4cmd_q->id;
chan->pt = pt;
chan->vc.desc_free = pt_do_cleanup;
vchan_init(&chan->vc, dma_dev);
}
} else {
chan = pt->pt_dma_chan;
chan->pt = pt;
chan->vc.desc_free = pt_do_cleanup;
vchan_init(&chan->vc, dma_dev);
}
ret = dma_async_device_register(dma_dev);
if (ret)
@ -398,6 +661,7 @@ err_cache:
return ret;
}
EXPORT_SYMBOL_GPL(pt_dmaengine_register);
void pt_dmaengine_unregister(struct pt_device *pt)
{

View File

@ -184,6 +184,7 @@ struct pt_dma_desc {
struct pt_dma_chan {
struct virt_dma_chan vc;
struct pt_device *pt;
u32 id;
};
struct pt_cmd_queue {
@ -262,6 +263,7 @@ struct pt_device {
unsigned long total_interrupts;
struct pt_tasklet_data tdata;
int ver;
};
/*

View File

@ -1323,6 +1323,7 @@ static const struct pci_device_id amd_ntb_pci_tbl[] = {
{ PCI_VDEVICE(AMD, 0x148b), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(AMD, 0x14c0), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(AMD, 0x14c3), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(AMD, 0x155a), (kernel_ulong_t)&dev_data[1] },
{ PCI_VDEVICE(HYGON, 0x145b), (kernel_ulong_t)&dev_data[0] },
{ 0, }
};

View File

@ -0,0 +1 @@
# CONFIG_AMD_AE4DMA is not set

View File

@ -0,0 +1 @@
CONFIG_AMD_AE4DMA=m