Add a cache to allow the reuse of SHM segments.
Shared memory segments are added to the cache instead of being
deallocated, and the cache is searched instead of/before allocating a
new segment.
Both the SHM segments and their attachment with the X server are cached.
The cache currently has a fixed number of 10 entries, this provided a
good cache hit rate while keeping memory usage under control.
Building with DEBUG_SHM_CACHE defined and running with
G_MESSAGES_DEBUG=all will periodically print out the SHM cache hit
rate.
On my Ubuntu 18.04 system running XFCE4 with a 2560x1440 screen, the
cache hit rate starts around 72%. On-screen windows that update often
and have consistently-sized damage rectangles are the best case. With
several of those (scrolling terminal windows, web browser showing a
WebGL demo), the hit rate slowly rises to around 92%.
Operations that generate rapid damage reports (like resizing or moving
windows) will lower the hit rate.
Signed-off-by: Brendan Shanks <bshanks@xxxxxxxxxxxxxxx>
---
src/display.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++----
src/display.h | 11 +++-
2 files changed, 170 insertions(+), 16 deletions(-)
diff --git a/src/display.c b/src/display.c
index a2a18b8..55461a2 100644
--- a/src/display.c
+++ b/src/display.c
@@ -213,11 +213,155 @@ static int register_for_events(display_t *d)
return 0;
}
+static void shm_segment_destroy(display_t *d, shm_segment_t *segment)
+{
+ if (segment->shmid == -1) {
+ return;
+ }
+
+ xcb_shm_detach(d->c, segment->shmseg);
+ segment->shmseg = -1;
+
+ shmdt(segment->shmaddr);
+ segment->shmaddr = NULL;
+
+ shmctl(segment->shmid, IPC_RMID, NULL);
+ segment->shmid = -1;
+}
+
+
+static int shm_cache_get(display_t *d, size_t size, shm_segment_t *segment)
+{
+ int i, ret;
+ shm_segment_t *bigger_entry = NULL;
+ shm_segment_t *entry_to_use = NULL;
+
+#if defined(DEBUG_SHM_CACHE)
+ static guint cache_hits = 0;
+ static guint cache_total = 0;
+
+ cache_total++;
+#endif
+
+ g_mutex_lock(&d->shm_cache_mutex);
+
+ /* Check the cache for a segment of size 'size' or bigger.
+ * Use an exact-size segment if found.
+ * If not, use the smallest entry that is big enough.
+ */
+ for (i = 0; i < G_N_ELEMENTS(d->shm_cache); i++) {
+ shm_segment_t *entry = &d->shm_cache[i];
+
+ if (entry->shmid != -1) {
+ /* If a cache entry of the exact size being requested is found,
use that */
+ if (size == entry->size) {
+ entry_to_use = entry;
+ break;
+ }
+
+ /* Keep track of the next-biggest entry in case an exact-size
match isn't found */
+ if (size < entry->size) {
+ if ((bigger_entry && entry->size < bigger_entry->size) ||
!bigger_entry) {
+ smallest_entry = entry;
+ }
+ }
+ }
+
+ /* If no empty entries were found, evict 'smallest_entry' and reuse it
*/
+ if (!entry_to_use && smallest_entry) {
+ shm_segment_destroy(d, smallest_entry);
+ entry_to_use = smallest_entry;
+ }
+
+ if (entry_to_use) {
+ *entry_to_use = *segment;
+ ret = 1;
+ } else {
+ /* Cache is full, and contained no entries smaller than the one
being added */
+ ret = 0;
+ }
+
+ g_mutex_unlock(&d->shm_cache_mutex);
+ return ret;
+}
+
+static void shm_cache_destroy(display_t *d)
+{
+ int i;
+
+ g_mutex_lock(&d->shm_cache_mutex);
+ for (i = 0; i < G_N_ELEMENTS(d->shm_cache); i++) {
+ shm_segment_t *entry = &d->shm_cache[i];
+
+ shm_segment_destroy(d, entry);
+ }
+ g_mutex_unlock(&d->shm_cache_mutex);
+}
int display_open(display_t *d, session_t *session)
{
int scr;
int rc;
+ int i;
xcb_damage_query_version_cookie_t dcookie;
xcb_damage_query_version_reply_t *damage_version;
xcb_xkb_use_extension_cookie_t use_cookie;
@@ -314,6 +458,11 @@ int display_open(display_t *d, session_t *session)
if (rc)
return rc;
+ g_mutex_init(&d->shm_cache_mutex);
+ for (i = 0; i < G_N_ELEMENTS(d->shm_cache); i++) {
+ d->shm_cache[i].shmid = -1;
+ }
+
rc = display_create_screen_images(d);
g_message("Display %s opened", session->options.display ?
session->options.display : "");
@@ -321,17 +470,6 @@ int display_open(display_t *d, session_t *session)
return rc;
}
-/*
- TODO: Implement a cache for shared memory handles
- That is, instead of doing the shmget/shmat for every read,
- we should cache the shmid, and reuse if we're doing a later
- read of a similar size.
-
- We would likely see a 40% improvement in raw read time. Note
- that a callgrind run suggested that would be on the order of 5%
- overall.
-*/
-
shm_image_t *create_shm_image(display_t *d, unsigned int w, unsigned int h)
{
shm_image_t *shmi;
@@ -349,6 +487,11 @@ shm_image_t *create_shm_image(display_t *d, unsigned int
w, unsigned int h)
shmi->bytes_per_line = (bits_per_pixel(d) / 8) * shmi->w;
imgsize = shmi->bytes_per_line * shmi->h;
+ if (shm_cache_get(d, imgsize, &shmi->segment)) {
+ return shmi;
+ }
+
+ /* No usable shared memory segment found in cache, allocate a new one */
shmi->segment.shmid = shmget(IPC_PRIVATE, imgsize, IPC_CREAT | 0700);
if (shmi->segment.shmid != -1)
shmi->segment.shmaddr = shmat(shmi->segment.shmid, 0, 0);
@@ -360,6 +503,7 @@ shm_image_t *create_shm_image(display_t *d, unsigned int
w, unsigned int h)
/* We tell shmctl to detach now; that prevents us from holding this
shared memory segment forever in case of abnormal process exit. */
shmctl(shmi->segment.shmid, IPC_RMID, NULL);
+ shmi->segment.size = imgsize;
shmi->segment.shmseg = xcb_generate_id(d->c);
cookie = xcb_shm_attach_checked(d->c, shmi->segment.shmseg,
shmi->segment.shmid, 0);
@@ -451,9 +595,10 @@ void display_copy_image_into_fullscreen(display_t *d,
shm_image_t *shmi, int x,
void destroy_shm_image(display_t *d, shm_image_t *shmi)
{
- xcb_shm_detach(d->c, shmi->segment.shmseg);
- shmdt(shmi->segment.shmaddr);
- shmctl(shmi->segment.shmid, IPC_RMID, NULL);
+ if (!shm_cache_add(d, &shmi->segment)) {
+ /* Could not add to cache, destroy this segment */
+ shm_segment_destroy(d, &shmi->segment);
+ }
if (shmi->drawable_ptr)
free(shmi->drawable_ptr);
free(shmi);
@@ -521,6 +666,8 @@ void display_stop_event_thread(display_t *d)
void display_close(display_t *d)
{
+ shm_cache_destroy(d);
+ g_mutex_clear(&d->shm_cache_mutex);
xcb_damage_destroy(d->c, d->damage);
display_destroy_screen_images(d);
xcb_disconnect(d->c);
diff --git a/src/display.h b/src/display.h
index da1b991..0aea348 100644
--- a/src/display.h
+++ b/src/display.h
@@ -21,18 +21,19 @@
#ifndef DISPLAY_H_
#define DISPLAY_H_
+#include <glib.h>
#include <xcb/xcb.h>
#include <xcb/damage.h>
#include <xcb/shm.h>
-
struct session_struct;
/*----------------------------------------------------------------------------
** Structure definitions
**--------------------------------------------------------------------------*/
typedef struct {
- int shmid;
+ int shmid; /* if shmid is -1: the shm_segment_t is "empty", other
members are undefined */
+ size_t size;
xcb_shm_seg_t shmseg;
void *shmaddr;
} shm_segment_t;
@@ -63,6 +64,12 @@ typedef struct {
shm_image_t *fullscreen;
shm_image_t *scanline;
+ /* The SHM cache holds up to 10 segments, this provides a good cache
+ * hit rate while keeping memory usage reasonable.
+ */
+ shm_segment_t shm_cache[10];
+ GMutex shm_cache_mutex;
+
pthread_t event_thread;
struct session_struct *session;
} display_t;