mmc: dwmmc: Add stride PIO for better burst mode support

This patch enables stride PIO for better burst mode
support on some rockchip platforms, for instance rk3128,
etc. It uses ldm and stm to make multiple accessing from
CPU under some ARM architecture.

Design Note:
1) Carefully test it beyond the armv7(m) core since I don't
test them! If you disassemble the code, dwmci_memcpy_fromio
may be changed to use ldmia.w and stmia.w, as well as some
other optimization for push/pop stack operation, but they
are all fine!
2) Do *NOT* remove noinline and __volatile attributes as
you could fall into trouble by the optimization of GCC.
And please invent new attributes if you use *OTHER* compilers
which have their own keywords claiming the function to be a
"plase don't inline the function and please don't reorder the
groups".
3) If you want to use this feature for other rockchip platforms,
you could append new config in rockchip_dw_mmc.c. But now we only
have CONFIG_ROCKCHIP_RK3128 which means we just enable it for
rk312x platforms.
4) Stride PIO is determined by both of host->stride_pio and
the data payload. The intention of it is to make the block
accessing faster. So now only enable it for who's data payload
is larger(or equal to)than 512 Bytes.
5) MAX_STRIDE means we support burst MAX_STRIDE * 4 Bytes per
stride. Of course you could change that, but please read the
comment before it in the code and do it carefully.

How to test?
1) Prepare a memory, for instance, 0x70000000 on RK3126c.

=> mw 0x70000000 0x55aa55aa 0x200

2) Check it to see if 0x70000000 ~ 0x700000200 are all 0x55aa55aa

=> md 0x70000000 0x200
70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
....

3) Fetch data of 0x70000000 ~ 0x700000200 and wrrite them to eMMC,
for instance, LBA 0x10000, and blk count is 1 as one blk means 0x200
bytes.

=> mmc write 0x70000000 0x10000 1

MMC write: dev # 0, block # 65536, count 1 ... 1 blocks written: OK

4) Clean the merory buffer and double check it

=> mw 0x70000000 0x0 0x200
=> md 0x70000000 0x200
70000000: 00000000 00000000 00000000 00000000    ................
70000010: 00000000 00000000 00000000 00000000    ................
70000020: 00000000 00000000 00000000 00000000    ................
....

5) Well, now let's read back the data from eMMC from the LBA
we wrote before.

=> mmc read 0x70000000 0x10000 1

MMC read: dev # 0, block # 65536, count 1 ... 1 blocks read: OK

6) We expect the 0x70000000 ~ 0x700000200 should be 0x55aa55aa instead
of 0x0 which is cleared by step 4).

=> md 0x70000000 0x200
70000000: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000010: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000020: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
70000030: 55aa55aa 55aa55aa 55aa55aa 55aa55aa    .U.U.U.U.U.U.U.U
....

Great, we see that the write and read eMMC are finished well and
the data payload is correct as expected!

Change-Id: I9b68c335449550b95f1a8f5841d46821346e45af
Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com>
This commit is contained in:
Shawn Lin 2017-10-09 17:51:29 +08:00 committed by Kever Yang
parent 3d363c8dd8
commit bda599f7c7
3 changed files with 87 additions and 6 deletions

View File

@ -16,6 +16,47 @@
#define PAGE_SIZE 4096
/*
* Currently it supports read/write up to 8*8*4 Bytes per
* stride as a burst mode. Please note that if you change
* MAX_STRIDE, you should also update dwmci_memcpy_fromio
* to augment the groups of {ldm, stm}.
*/
#define MAX_STRIDE 64
#if CONFIG_ARM && CONFIG_CPU_V7
void noinline dwmci_memcpy_fromio(void *buffer, void *fifo_addr)
{
__asm__ __volatile__ (
"push {r2, r3, r4, r5, r6, r7, r8, r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"ldm r1, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"stm r0!, {r2,r3,r4,r5,r6,r7,r8,r9}\n"
"pop {r2, r3, r4, r5, r6,r7,r8,r9}\n"
:::"memory"
);
}
void noinline dwmci_memcpy_toio(void *buffer, void *fifo_addr)
{
dwmci_memcpy_fromio(fifo_addr, buffer);
}
#else
void dwmci_memcpy_fromio(void *buffer, void *fifo_addr) {};
void dwmci_memcpy_toio(void *buffer, void *fifo_addr) {};
#endif
static int dwmci_wait_reset(struct dwmci_host *host, u32 value)
{
unsigned long timeout = 1000;
@ -102,8 +143,11 @@ static int dwmci_data_transfer(struct dwmci_host *host, struct mmc_data *data)
ulong start = get_timer(0);
u32 fifo_depth = (((host->fifoth_val & RX_WMARK_MASK) >>
RX_WMARK_SHIFT) + 1) * 2;
bool stride;
size = data->blocksize * data->blocks / 4;
/* Still use legacy PIO mode if size < 512(128 * 4) Bytes */
stride = host->stride_pio && size > 128;
if (data->flags == MMC_DATA_READ)
buf = (unsigned int *)data->dest;
else
@ -144,9 +188,24 @@ static int dwmci_data_transfer(struct dwmci_host *host, struct mmc_data *data)
len = (len >> DWMCI_FIFO_SHIFT) &
DWMCI_FIFO_MASK;
len = min(size, len);
for (i = 0; i < len; i++)
*buf++ =
dwmci_readl(host, DWMCI_DATA);
if (!stride) {
/* Legacy pio mode */
for (i = 0; i < len; i++)
*buf++ = dwmci_readl(host, DWMCI_DATA);
goto read_again;
}
/* dwmci_memcpy_fromio now bursts 256 Bytes once */
if (len < MAX_STRIDE)
continue;
for (i = 0; i < len / MAX_STRIDE; i++) {
dwmci_memcpy_fromio(buf, host->ioaddr + DWMCI_DATA);
buf += MAX_STRIDE;
}
len = i * MAX_STRIDE;
read_again:
size = size > len ? (size - len) : 0;
}
dwmci_writel(host, DWMCI_RINTSTS,
@ -159,9 +218,23 @@ static int dwmci_data_transfer(struct dwmci_host *host, struct mmc_data *data)
DWMCI_FIFO_SHIFT) &
DWMCI_FIFO_MASK);
len = min(size, len);
for (i = 0; i < len; i++)
dwmci_writel(host, DWMCI_DATA,
*buf++);
if (!stride) {
for (i = 0; i < len; i++)
dwmci_writel(host, DWMCI_DATA,
*buf++);
goto write_again;
}
/* dwmci_memcpy_toio now bursts 256 Bytes once */
if (len < MAX_STRIDE)
continue;
for (i = 0; i < len / MAX_STRIDE; i++) {
dwmci_memcpy_toio(buf, host->ioaddr + DWMCI_DATA);
buf += MAX_STRIDE;
}
len = i * MAX_STRIDE;
write_again:
size = size > len ? (size - len) : 0;
}
dwmci_writel(host, DWMCI_RINTSTS,

View File

@ -260,6 +260,12 @@ static int rockchip_dwmmc_probe(struct udevice *dev)
host->fifo_mode = priv->fifo_mode;
#ifdef CONFIG_ROCKCHIP_RK3128
host->stride_pio = true;
#else
host->stride_pio = false;
#endif
#ifdef CONFIG_PWRSEQ
/* Enable power if needed */
ret = uclass_get_device_by_phandle(UCLASS_PWRSEQ, dev, "mmc-pwrseq",

View File

@ -146,6 +146,7 @@
* @fifoth_val: Value for FIFOTH register (or 0 to leave unset)
* @mmc: Pointer to generic MMC structure for this device
* @priv: Private pointer for use by controller
* @stride_pio: Provide the ability of accessing fifo with burst mode
*/
struct dwmci_host {
const char *name;
@ -162,6 +163,7 @@ struct dwmci_host {
u32 fifoth_val;
struct mmc *mmc;
void *priv;
bool stride_pio;
void (*clksel)(struct dwmci_host *host);
void (*board_init)(struct dwmci_host *host);