Hi Chris, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [also build test ERROR on v4.18 next-20180817] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Chris-Wilson/drm-i915-execlists-Micro-optimise-idle-context-switch/20180817-205726 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-randconfig-s0-201832 (attached as .config) compiler: gcc-6 (Debian 6.4.0-9) 6.4.0 20171026 reproduce: # save the attached .config to linux build tree make ARCH=i386 All errors (new ones prefixed by >>): drivers/gpu//drm/i915/intel_lrc.c: In function 'execlists_dequeue': >> drivers/gpu//drm/i915/intel_lrc.c:730:9: error: implicit declaration of function 'intel_engine_signaled' [-Werror=implicit-function-declaration] intel_engine_signaled(engine, ^~~~~~~~~~~~~~~~~~~~~ cc1: some warnings being treated as errors vim +/intel_engine_signaled +730 drivers/gpu//drm/i915/intel_lrc.c 580 581 static void execlists_dequeue(struct intel_engine_cs *engine) 582 { 583 struct intel_engine_execlists * const execlists = &engine->execlists; 584 struct execlist_port *port = execlists->port; 585 const struct execlist_port * const last_port = 586 &execlists->port[execlists->port_mask]; 587 struct i915_request *last = port_request(port); 588 struct rb_node *rb; 589 bool submit = false; 590 591 /* 592 * Hardware submission is through 2 ports. Conceptually each port 593 * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is 594 * static for a context, and unique to each, so we only execute 595 * requests belonging to a single context from each ring. RING_HEAD 596 * is maintained by the CS in the context image, it marks the place 597 * where it got up to last time, and through RING_TAIL we tell the CS 598 * where we want to execute up to this time. 599 * 600 * In this list the requests are in order of execution. Consecutive 601 * requests from the same context are adjacent in the ringbuffer. We 602 * can combine these requests into a single RING_TAIL update: 603 * 604 * RING_HEAD...req1...req2 605 * ^- RING_TAIL 606 * since to execute req2 the CS must first execute req1. 607 * 608 * Our goal then is to point each port to the end of a consecutive 609 * sequence of requests as being the most optimal (fewest wake ups 610 * and context switches) submission. 611 */ 612 613 if (last) { 614 /* 615 * Don't resubmit or switch until all outstanding 616 * preemptions (lite-restore) are seen. Then we 617 * know the next preemption status we see corresponds 618 * to this ELSP update. 619 */ 620 GEM_BUG_ON(!execlists_is_active(execlists, 621 EXECLISTS_ACTIVE_USER)); 622 GEM_BUG_ON(!port_count(&port[0])); 623 624 /* 625 * If we write to ELSP a second time before the HW has had 626 * a chance to respond to the previous write, we can confuse 627 * the HW and hit "undefined behaviour". After writing to ELSP, 628 * we must then wait until we see a context-switch event from 629 * the HW to indicate that it has had a chance to respond. 630 */ 631 if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) 632 return; 633 634 if (need_preempt(engine, last, execlists->queue_priority)) { 635 inject_preempt_context(engine); 636 return; 637 } 638 639 /* 640 * In theory, we could coalesce more requests onto 641 * the second port (the first port is active, with 642 * no preemptions pending). However, that means we 643 * then have to deal with the possible lite-restore 644 * of the second port (as we submit the ELSP, there 645 * may be a context-switch) but also we may complete 646 * the resubmission before the context-switch. Ergo, 647 * coalescing onto the second port will cause a 648 * preemption event, but we cannot predict whether 649 * that will affect port[0] or port[1]. 650 * 651 * If the second port is already active, we can wait 652 * until the next context-switch before contemplating 653 * new requests. The GPU will be busy and we should be 654 * able to resubmit the new ELSP before it idles, 655 * avoiding pipeline bubbles (momentary pauses where 656 * the driver is unable to keep up the supply of new 657 * work). However, we have to double check that the 658 * priorities of the ports haven't been switch. 659 */ 660 if (port_count(&port[1])) 661 return; 662 663 /* 664 * WaIdleLiteRestore:bdw,skl 665 * Apply the wa NOOPs to prevent 666 * ring:HEAD == rq:TAIL as we resubmit the 667 * request. See gen8_emit_breadcrumb() for 668 * where we prepare the padding after the 669 * end of the request. 670 */ 671 last->tail = last->wa_tail; 672 } 673 674 while ((rb = rb_first_cached(&execlists->queue))) { 675 struct i915_priolist *p = to_priolist(rb); 676 struct i915_request *rq, *rn; 677 678 list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { 679 /* 680 * Can we combine this request with the current port? 681 * It has to be the same context/ringbuffer and not 682 * have any exceptions (e.g. GVT saying never to 683 * combine contexts). 684 * 685 * If we can combine the requests, we can execute both 686 * by updating the RING_TAIL to point to the end of the 687 * second request, and so we never need to tell the 688 * hardware about the first. 689 */ 690 if (last && 691 !can_merge_ctx(rq->hw_context, last->hw_context)) { 692 /* 693 * If we are on the second port and cannot 694 * combine this request with the last, then we 695 * are done. 696 */ 697 if (port == last_port) { 698 __list_del_many(&p->requests, 699 &rq->sched.link); 700 goto done; 701 } 702 703 /* 704 * If GVT overrides us we only ever submit 705 * port[0], leaving port[1] empty. Note that we 706 * also have to be careful that we don't queue 707 * the same context (even though a different 708 * request) to the second port. 709 */ 710 if (ctx_single_port_submission(last->hw_context) || 711 ctx_single_port_submission(rq->hw_context)) { 712 __list_del_many(&p->requests, 713 &rq->sched.link); 714 goto done; 715 } 716 717 GEM_BUG_ON(last->hw_context == rq->hw_context); 718 719 /* 720 * Avoid reloading the previous context if we 721 * know it has just completed and we want 722 * to switch over to a new context. The CS 723 * interrupt is likely waiting for us to 724 * release the local irq lock and so we will 725 * proceed with the submission momentarily, 726 * which is quicker than reloading the context 727 * on the gpu. 728 */ 729 if (!submit && > 730 intel_engine_signaled(engine, 731 last->global_seqno)) { 732 GEM_BUG_ON(!list_is_first(&rq->sched.link, 733 &p->requests)); 734 return; 735 } 736 737 if (submit) 738 port_assign(port, last); 739 port++; 740 741 GEM_BUG_ON(port_isset(port)); 742 } 743 744 INIT_LIST_HEAD(&rq->sched.link); 745 __i915_request_submit(rq); 746 trace_i915_request_in(rq, port_index(port, execlists)); 747 last = rq; 748 submit = true; 749 } 750 751 rb_erase_cached(&p->node, &execlists->queue); 752 INIT_LIST_HEAD(&p->requests); 753 if (p->priority != I915_PRIORITY_NORMAL) 754 kmem_cache_free(engine->i915->priorities, p); 755 } 756 757 done: 758 /* 759 * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. 760 * 761 * We choose queue_priority such that if we add a request of greater 762 * priority than this, we kick the submission tasklet to decide on 763 * the right order of submitting the requests to hardware. We must 764 * also be prepared to reorder requests as they are in-flight on the 765 * HW. We derive the queue_priority then as the first "hole" in 766 * the HW submission ports and if there are no available slots, 767 * the priority of the lowest executing request, i.e. last. 768 * 769 * When we do receive a higher priority request ready to run from the 770 * user, see queue_request(), the queue_priority is bumped to that 771 * request triggering preemption on the next dequeue (or subsequent 772 * interrupt for secondary ports). 773 */ 774 execlists->queue_priority = 775 port != execlists->port ? rq_prio(last) : INT_MIN; 776 777 if (submit) { 778 port_assign(port, last); 779 execlists_submit_ports(engine); 780 } 781 782 /* We must always keep the beast fed if we have work piled up */ 783 GEM_BUG_ON(rb_first_cached(&execlists->queue) && 784 !port_isset(execlists->port)); 785 786 /* Re-evaluate the executing context setup after each preemptive kick */ 787 if (last) 788 execlists_user_begin(execlists, execlists->port); 789 790 /* If the engine is now idle, so should be the flag; and vice versa. */ 791 GEM_BUG_ON(execlists_is_active(&engine->execlists, 792 EXECLISTS_ACTIVE_USER) == 793 !port_isset(engine->execlists.port)); 794 } 795 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation
Attachment:
.config.gz
Description: application/gzip
_______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx