Using poll_state as is on arm64 seems sub-optimal, would not something
like the below make sense?
---
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 9b6d90a72601..9ab40198b042 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -27,7 +27,11 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
limit = cpuidle_poll_time(drv, dev);
while (!need_resched()) {
- cpu_relax();
+
+ smp_cond_load_relaxed(current_thread_info()->flags,
+ (VAL & TIF_NEED_RESCHED) ||
+ (loop_count++ >= POLL_IDLE_RELAX_COUNT));
+
if (loop_count++ < POLL_IDLE_RELAX_COUNT)
continue;
Thank you for the suggestion. I have tried it and also different
variations like [1] to respect the initial logic
but I obtain poor performance compared to the initial one:
perf bench sched pipe
# Running 'sched/pipe' benchmark:
# Executed 1000000 pipe operations between two processes
Total time: 136.215 [sec]
136.215229 usecs/op
7341 ops/sec
[1]
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -26,12 +26,16 @@ static int __cpuidle poll_idle(struct cpuidle_device
*dev,
limit = cpuidle_poll_time(drv, dev);
- while (!need_resched()) {
- cpu_relax();
- if (loop_count++ < POLL_IDLE_RELAX_COUNT)
- continue;
-
+ for (;;) {
loop_count = 0;
+
+ smp_cond_load_relaxed(¤t_thread_info()->flags,
+ (VAL & TIF_NEED_RESCHED) ||
+ (loop_count++ >=
POLL_IDLE_RELAX_COUNT));
+
+ if (loop_count < POLL_IDLE_RELAX_COUNT)
+ break;
+
if (local_clock_noinstr() - time_start > limit) {
dev->poll_time_limit = true;
break;