On Tue, Apr 3, 2012 at 11:23 AM, Dave Airlie <airlied@xxxxxxxxx> wrote: > From: Dave Airlie <airlied@xxxxxxxxxx> > > On coherent systems (not-AGP) the IB should be in cached memory so should > be just as fast, so we can avoid copying to temporary pages and just use it > directly. > > provides minor speedups on rv530: gears ~1820->1860, ipers: 29.9->30.6, > but always good to use less CPU if we can. > > Signed-off-by: Dave Airlie <airlied@xxxxxxxxxx> Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > --- > drivers/gpu/drm/radeon/radeon.h | 1 + > drivers/gpu/drm/radeon/radeon_cs.c | 46 ++++++++++++++++++++++++---------- > drivers/gpu/drm/radeon/radeon_drv.c | 4 +++ > 3 files changed, 37 insertions(+), 14 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 138b952..5331b27 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -94,6 +94,7 @@ extern int radeon_disp_priority; > extern int radeon_hw_i2c; > extern int radeon_pcie_gen2; > extern int radeon_msi; > +extern int radeon_copy1; > > /* > * Copy from radeon_drv.h so we don't have to include both and have conflicting > diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c > index 5cac832..1647be2 100644 > --- a/drivers/gpu/drm/radeon/radeon_cs.c > +++ b/drivers/gpu/drm/radeon/radeon_cs.c > @@ -278,11 +278,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) > p->chunks[p->chunk_ib_idx].length_dw); > return -EINVAL; > } > - p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); > - p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); > - if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || > - p->chunks[p->chunk_ib_idx].kpage[1] == NULL) > - return -ENOMEM; > + if ((p->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) { > + p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); > + p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); > + if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || > + p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { > + kfree(p->chunks[i].kpage[0]); > + kfree(p->chunks[i].kpage[1]); > + return -ENOMEM; > + } > + } > p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; > p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; > p->chunks[p->chunk_ib_idx].last_copied_page = -1; > @@ -323,8 +328,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) > kfree(parser->relocs_ptr); > for (i = 0; i < parser->nchunks; i++) { > kfree(parser->chunks[i].kdata); > - kfree(parser->chunks[i].kpage[0]); > - kfree(parser->chunks[i].kpage[1]); > + if ((parser->rdev->flags & RADEON_IS_AGP) || !radeon_copy1) { > + kfree(parser->chunks[i].kpage[0]); > + kfree(parser->chunks[i].kpage[1]); > + } > } > kfree(parser->chunks); > kfree(parser->chunks_array); > @@ -573,6 +580,8 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) > struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; > int i; > int size = PAGE_SIZE; > + void *ptr; > + bool copy1 = (p->rdev->flags & RADEON_IS_AGP || !radeon_copy1) ? false : true; > > for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { > if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)), > @@ -583,23 +592,32 @@ int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) > } > } > > - new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; > - > if (pg_idx == ibc->last_page_index) { > size = (ibc->length_dw * 4) % PAGE_SIZE; > - if (size == 0) > - size = PAGE_SIZE; > + if (size == 0) > + size = PAGE_SIZE; > } > > - if (DRM_COPY_FROM_USER(ibc->kpage[new_page], > + if (!copy1) { > + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; > + ptr = ibc->kpage[new_page]; > + } else { > + new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; > + ptr = p->ib->ptr + (pg_idx * (PAGE_SIZE / 4)); > + ibc->kpage[new_page] = ptr; > + } > + > + if (DRM_COPY_FROM_USER(ptr, > ibc->user_ptr + (pg_idx * PAGE_SIZE), > size)) { > p->parser_error = -EFAULT; > return 0; > } > > - /* copy to IB here */ > - memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); > + if (!copy1) { > + /* copy to IB here */ > + memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); > + } > > ibc->last_copied_page = pg_idx; > ibc->kpage_idx[new_page] = pg_idx; > diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c > index ef7bb3f..3ea3377 100644 > --- a/drivers/gpu/drm/radeon/radeon_drv.c > +++ b/drivers/gpu/drm/radeon/radeon_drv.c > @@ -128,6 +128,7 @@ int radeon_disp_priority = 0; > int radeon_hw_i2c = 0; > int radeon_pcie_gen2 = 0; > int radeon_msi = -1; > +int radeon_copy1 = 0; > > MODULE_PARM_DESC(no_wb, "Disable AGP writeback for scratch registers"); > module_param_named(no_wb, radeon_no_wb, int, 0444); > @@ -177,6 +178,9 @@ module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444); > MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)"); > module_param_named(msi, radeon_msi, int, 0444); > > +MODULE_PARM_DESC(copy1, "One Copy test"); > +module_param_named(copy1, radeon_copy1, int, 0644); > + > static int radeon_suspend(struct drm_device *dev, pm_message_t state) > { > drm_radeon_private_t *dev_priv = dev->dev_private; > -- > 1.7.6.5 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel