The cache causes a fixed delay regardless of stream parameters. Consequently, all that "cache invalidate size" calculation stuff was garbage (which can be traced right back to Creative's OSS driver). This also removes the definitions of registers CD1..CDF, because they are accessed only relative to CD0 anyway. Signed-off-by: Oswald Buddenhagen <oswald.buddenhagen@xxxxxx> --- include/sound/emu10k1.h | 38 +++++++++++---------- sound/pci/emu10k1/emupcm.c | 67 +++++++++++++------------------------- 2 files changed, 43 insertions(+), 62 deletions(-) diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 2d64f07e3883..ee662a1b0dc7 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -116,6 +116,10 @@ #define IPR_MIDITRANSBUFEMPTY 0x00000100 /* MIDI UART transmit buffer empty */ #define IPR_MIDIRECVBUFEMPTY 0x00000080 /* MIDI UART receive buffer empty */ #define IPR_CHANNELLOOP 0x00000040 /* Channel (half) loop interrupt(s) pending */ + /* The interrupt is triggered shortly after */ + /* CCR_READADDRESS has crossed the boundary; */ + /* due to the cache, this runs ahead of the */ + /* actual playback position. */ #define IPR_CHANNELNUMBERMASK 0x0000003f /* When IPR_CHANNELLOOP is set, indicates the */ /* highest set channel in CLIPL, CLIPH, HLIPL, */ /* or HLIPH. When IPR is written with CL set, */ @@ -586,24 +590,22 @@ SUB_REG(PEFE, FILTERAMOUNT, 0x000000ff) /* Filter envlope amount */ /* 0x1f: not used */ -#define CD0 0x20 /* Cache data 0 register */ -#define CD1 0x21 /* Cache data 1 register */ -#define CD2 0x22 /* Cache data 2 register */ -#define CD3 0x23 /* Cache data 3 register */ -#define CD4 0x24 /* Cache data 4 register */ -#define CD5 0x25 /* Cache data 5 register */ -#define CD6 0x26 /* Cache data 6 register */ -#define CD7 0x27 /* Cache data 7 register */ -#define CD8 0x28 /* Cache data 8 register */ -#define CD9 0x29 /* Cache data 9 register */ -#define CDA 0x2a /* Cache data A register */ -#define CDB 0x2b /* Cache data B register */ -#define CDC 0x2c /* Cache data C register */ -#define CDD 0x2d /* Cache data D register */ -#define CDE 0x2e /* Cache data E register */ -#define CDF 0x2f /* Cache data F register */ - -/* 0x30-3f seem to be the same as 0x20-2f */ +// 32 cache registers (== 128 bytes) per channel follow. +// In stereo mode, the two channels' caches are concatenated into one, +// and hold the interleaved frames. +// The cache holds 64 frames, so the upper half is not used in 8-bit mode. +// All registers mentioned below count in frames. +// The cache is a ring buffer; CCR_READADDRESS operates modulo 64. +// The cache is filled from (CCCA_CURRADDR - CCR_CACHEINVALIDSIZE) +// into (CCR_READADDRESS - CCR_CACHEINVALIDSIZE). +// The engine has a fetch threshold of 32 bytes, so it tries to keep +// CCR_CACHEINVALIDSIZE below 8 (16-bit stereo), 16 (16-bit mono, +// 8-bit stereo), or 32 (8-bit mono). The actual transfers are pretty +// unpredictable, especially if several voices are running. +// Frames are consumed at CCR_READADDRESS, which is incremented afterwards, +// along with CCCA_CURRADDR and CCR_CACHEINVALIDSIZE. This implies that the +// actual playback position always lags CCCA_CURRADDR by exactly 64 frames. +#define CD0 0x20 /* Cache data registers 0 .. 0x1f */ #define PTB 0x40 /* Page table base register */ #define PTB_MASK 0xfffff000 /* Physical address of the page table in host memory */ diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index a6c4f1895a08..feb575922738 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -112,6 +112,10 @@ static int snd_emu10k1_pcm_channel_alloc(struct snd_emu10k1_pcm * epcm, int voic } } if (epcm->extra == NULL) { + // The hardware supports only (half-)loop interrupts, so to support an + // arbitrary number of periods per buffer, we use an extra voice with a + // period-sized loop as the interrupt source. Additionally, the interrupt + // timing of the hardware is "suboptimal" and needs some compensation. err = snd_emu10k1_voice_alloc(epcm->emu, epcm->type == PLAYBACK_EMUVOICE ? EMU10K1_PCM : EMU10K1_EFX, 1, @@ -232,23 +236,6 @@ static unsigned int emu10k1_select_interprom(unsigned int pitch_target) return CCCA_INTERPROM_2; } -/* - * calculate cache invalidate size - * - * stereo: channel is stereo - * w_16: using 16bit samples - * - * returns: cache invalidate size in samples - */ -static inline int emu10k1_ccis(int stereo, int w_16) -{ - if (w_16) { - return stereo ? 24 : 26; - } else { - return stereo ? 24*2 : 26*2; - } -} - static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, int master, int extra, struct snd_emu10k1_voice *evoice, @@ -264,7 +251,6 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, unsigned char send_routing[8]; unsigned long flags; unsigned int pitch_target; - unsigned int ccis; voice = evoice->number; stereo = runtime->channels == 2; @@ -284,10 +270,8 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, memcpy(send_amount, &mix->send_volume[tmp][0], 8); } - ccis = emu10k1_ccis(stereo, w_16); - if (master) { - evoice->epcm->ccca_start_addr = start_addr + ccis; + evoice->epcm->ccca_start_addr = start_addr + 64 - 3; } if (stereo && !extra) { // Not really necessary for the slave, but it doesn't hurt @@ -317,11 +301,11 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, else pitch_target = emu10k1_calc_pitch_target(runtime->rate); if (extra) - snd_emu10k1_ptr_write(emu, CCCA, voice, start_addr | + snd_emu10k1_ptr_write(emu, CCCA, voice, (end_addr - 3) | emu10k1_select_interprom(pitch_target) | (w_16 ? 0 : CCCA_8BITSELECT)); else - snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + ccis) | + snd_emu10k1_ptr_write(emu, CCCA, voice, (start_addr + 64 - 3) | emu10k1_select_interprom(pitch_target) | (w_16 ? 0 : CCCA_8BITSELECT)); /* Clear filter delay memory */ @@ -532,35 +516,30 @@ static void snd_emu10k1_playback_invalidate_cache(struct snd_emu10k1 *emu, struct snd_emu10k1_voice *evoice) { struct snd_pcm_runtime *runtime; - unsigned int voice, stereo, i, ccis, cra = 64, cs, sample; + unsigned voice, stereo, sample; + u32 ccr; runtime = evoice->epcm->substream->runtime; voice = evoice->number; stereo = (runtime->channels == 2); sample = snd_pcm_format_width(runtime->format) == 16 ? 0 : 0x80808080; - ccis = emu10k1_ccis(stereo, sample == 0); - /* set cs to 2 * number of cache registers beside the invalidated */ - cs = (sample == 0) ? (32-ccis) : (64-ccis+1) >> 1; - if (cs > 16) cs = 16; - for (i = 0; i < cs; i++) { + + // We assume that the cache is resting at this point (i.e., + // CCR_CACHEINVALIDSIZE is very small). + + // Clear leading frames. For simplicitly, this does too much, + // except for 16-bit stereo. And the interpolator will actually + // access them at all only when we're pitch-shifting. + for (int i = 0; i < 3; i++) snd_emu10k1_ptr_write(emu, CD0 + i, voice, sample); - if (stereo) { - snd_emu10k1_ptr_write(emu, CD0 + i, voice + 1, sample); - } - } - /* reset cache */ - snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, 0); - snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice, cra); + + // Fill cache + ccr = (64 - 3) << REG_SHIFT(CCR_CACHEINVALIDSIZE); if (stereo) { - snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice + 1, 0); - // The engine goes haywire if this one is out of sync - snd_emu10k1_ptr_write(emu, CCR_READADDRESS, voice + 1, cra); - } - /* fill cache */ - snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice, ccis); - if (stereo) { - snd_emu10k1_ptr_write(emu, CCR_CACHEINVALIDSIZE, voice+1, ccis); + // The engine goes haywire if CCR_READADDRESS is out of sync + snd_emu10k1_ptr_write(emu, CCR, voice + 1, ccr); } + snd_emu10k1_ptr_write(emu, CCR, voice, ccr); } static void snd_emu10k1_playback_commit_volume(struct snd_emu10k1 *emu, -- 2.40.0.152.g15d061e6df