summaryrefslogtreecommitdiff
path: root/kernel/sched/tune.c
diff options
context:
space:
mode:
authorRunmin Wang <runminw@codeaurora.org>2016-12-12 15:32:39 -0800
committerRunmin Wang <runminw@codeaurora.org>2016-12-16 13:52:17 -0800
commitefbe378b81e36d9ab6d3a2b3e0e2c3834c6a6528 (patch)
tree87312d7b72f387d44e4804561fc82fddf46fe10d /kernel/sched/tune.c
parenta80e267a8c0d61790c3d1d5f7181ebd1be39c438 (diff)
parent0a9cbce798bca52bb3476cb55ce33da27e100f3e (diff)
Merge branch 'v4.4-16.09-android-tmp' into lsk-v4.4-16.09-android
* v4.4-16.09-android-tmp: unsafe_[get|put]_user: change interface to use a error target label usercopy: remove page-spanning test for now usercopy: fix overlap check for kernel text mm/slub: support left redzone Linux 4.4.21 lib/mpi: mpi_write_sgl(): fix skipping of leading zero limbs regulator: anatop: allow regulator to be in bypass mode hwrng: exynos - Disable runtime PM on probe failure cpufreq: Fix GOV_LIMITS handling for the userspace governor metag: Fix atomic_*_return inline asm constraints scsi: fix upper bounds check of sense key in scsi_sense_key_string() ALSA: timer: fix NULL pointer dereference on memory allocation failure ALSA: timer: fix division by zero after SNDRV_TIMER_IOCTL_CONTINUE ALSA: timer: fix NULL pointer dereference in read()/ioctl() race ALSA: hda - Enable subwoofer on Dell Inspiron 7559 ALSA: hda - Add headset mic quirk for Dell Inspiron 5468 ALSA: rawmidi: Fix possible deadlock with virmidi registration ALSA: fireworks: accessing to user space outside spinlock ALSA: firewire-tascam: accessing to user space outside spinlock ALSA: usb-audio: Add sample rate inquiry quirk for B850V3 CP2114 crypto: caam - fix IV loading for authenc (giv)decryption uprobes: Fix the memcg accounting x86/apic: Do not init irq remapping if ioapic is disabled vhost/scsi: fix reuse of &vq->iov[out] in response bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two. ubifs: Fix assertion in layout_in_gaps() ovl: fix workdir creation ovl: listxattr: use strnlen() ovl: remove posix_acl_default from workdir ovl: don't copy up opaqueness wrappers for ->i_mutex access lustre: remove unused declaration timekeeping: Avoid taking lock in NMI path with CONFIG_DEBUG_TIMEKEEPING timekeeping: Cap array access in timekeeping_debug xfs: fix superblock inprogress check ASoC: atmel_ssc_dai: Don't unconditionally reset SSC on stream startup drm/msm: fix use of copy_from_user() while holding spinlock drm: Reject page_flip for !DRIVER_MODESET drm/radeon: fix radeon_move_blit on 32bit systems s390/sclp_ctl: fix potential information leak with /dev/sclp rds: fix an infoleak in rds_inc_info_copy powerpc/tm: Avoid SLB faults in treclaim/trecheckpoint when RI=0 nvme: Call pci_disable_device on the error path. cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork block: make sure a big bio is split into at most 256 bvecs block: Fix race triggered by blk_set_queue_dying() ext4: avoid modifying checksum fields directly during checksum verification ext4: avoid deadlock when expanding inode size ext4: properly align shifted xattrs when expanding inodes ext4: fix xattr shifting when expanding inodes part 2 ext4: fix xattr shifting when expanding inodes ext4: validate that metadata blocks do not overlap superblock net: Use ns_capable_noaudit() when determining net sysctl permissions kernel: Add noaudit variant of ns_capable() KEYS: Fix ASN.1 indefinite length object parsing drivers:hv: Lock access to hyperv_mmio resource tree cxlflash: Move to exponential back-off when cmd_room is not available netfilter: x_tables: check for size overflow drm/amdgpu/cz: enable/disable vce dpm even if vce pg is disabled cred: Reject inodes with invalid ids in set_create_file_as() fs: Check for invalid i_uid in may_follow_link() IB/IPoIB: Do not set skb truesize since using one linearskb udp: properly support MSG_PEEK with truncated buffers crypto: nx-842 - Mask XERS0 bit in return value cxlflash: Fix to avoid virtual LUN failover failure cxlflash: Fix to escalate LINK_RESET also on port 1 tipc: fix nl compat regression for link statistics tipc: fix an infoleak in tipc_nl_compat_link_dump netfilter: x_tables: check for size overflow Bluetooth: Add support for Intel Bluetooth device 8265 [8087:0a2b] drm/i915: Check VBT for port presence in addition to the strap on VLV/CHV drm/i915: Only ignore eDP ports that are connected Input: xpad - move pending clear to the correct location net: thunderx: Fix link status reporting x86/hyperv: Avoid reporting bogus NMI status for Gen2 instances crypto: vmx - IV size failing on skcipher API tda10071: Fix dependency to REGMAP_I2C crypto: vmx - Fix ABI detection crypto: vmx - comply with ABIs that specify vrsave as reserved. HID: core: prevent out-of-bound readings lpfc: Fix DMA faults observed upon plugging loopback connector block: fix blk_rq_get_max_sectors for driver private requests irqchip/gicv3-its: numa: Enable workaround for Cavium thunderx erratum 23144 clocksource: Allow unregistering the watchdog btrfs: Continue write in case of can_not_nocow blk-mq: End unstarted requests on dying queue cxlflash: Fix to resolve dead-lock during EEH recovery drm/radeon/mst: fix regression in lane/link handling. ecryptfs: fix handling of directory opening ALSA: hda: add AMD Polaris-10/11 AZ PCI IDs with proper driver caps drm: Balance error path for GEM handle allocation ntp: Fix ADJ_SETOFFSET being used w/ ADJ_NANO time: Verify time values in adjtimex ADJ_SETOFFSET to avoid overflow Input: xpad - correctly handle concurrent LED and FF requests net: thunderx: Fix receive packet stats net: thunderx: Fix for multiqset not configured upon interface toggle perf/x86/cqm: Fix CQM memory leak and notifier leak perf/x86/cqm: Fix CQM handling of grouping events into a cache_group s390/crypto: provide correct file mode at device register. proc: revert /proc/<pid>/maps [stack:TID] annotation intel_idle: Support for Intel Xeon Phi Processor x200 Product Family cxlflash: Fix to avoid unnecessary scan with internal LUNs Drivers: hv: vmbus: don't manipulate with clocksources on crash Drivers: hv: vmbus: avoid scheduling in interrupt context in vmbus_initiate_unload() Drivers: hv: vmbus: avoid infinite loop in init_vp_index() arcmsr: fixes not release allocated resource arcmsr: fixed getting wrong configuration data s390/pci_dma: fix DMA table corruption with > 4 TB main memory net/mlx5e: Don't modify CQ before it was created net/mlx5e: Don't try to modify CQ moderation if it is not supported mmc: sdhci: Do not BUG on invalid vdd UVC: Add support for R200 depth camera sched/numa: Fix use-after-free bug in the task_numa_compare ALSA: hda - add codec support for Kabylake display audio codec drm/i915: Fix hpd live status bits for g4x tipc: fix nullptr crash during subscription cancel arm64: Add workaround for Cavium erratum 27456 net: thunderx: Fix for Qset error due to CQ full drm/radeon: fix dp link rate selection (v2) drm/amdgpu: fix dp link rate selection (v2) qla2xxx: Use ATIO type to send correct tmr response mmc: sdhci: 64-bit DMA actually has 4-byte alignment drm/atomic: Do not unset crtc when an encoder is stolen drm/i915/skl: Add missing SKL ids drm/i915/bxt: update list of PCIIDs hrtimer: Catch illegal clockids i40e/i40evf: Fix RSS rx-flow-hash configuration through ethtool mpt3sas: Fix for Asynchronous completion of timedout IO and task abort of timedout IO. mpt3sas: A correction in unmap_resources net: cavium: liquidio: fix check for in progress flag arm64: KVM: Configure TCR_EL2.PS at runtime irqchip/gic-v3: Make sure read from ICC_IAR1_EL1 is visible on redestributor pwm: lpc32xx: fix and simplify duty cycle and period calculations pwm: lpc32xx: correct number of PWM channels from 2 to 1 pwm: fsl-ftm: Fix clock enable/disable when using PM megaraid_sas: Add an i/o barrier megaraid_sas: Fix SMAP issue megaraid_sas: Do not allow PCI access during OCR s390/cio: update measurement characteristics s390/cio: ensure consistent measurement state s390/cio: fix measurement characteristics memleak qeth: initialize net_device with carrier off lpfc: Fix external loopback failure. lpfc: Fix mbox reuse in PLOGI completion lpfc: Fix RDP Speed reporting. lpfc: Fix crash in fcp command completion path. lpfc: Fix driver crash when module parameter lpfc_fcp_io_channel set to 16 lpfc: Fix RegLogin failed error seen on Lancer FC during port bounce lpfc: Fix the FLOGI discovery logic to comply with T11 standards lpfc: Fix FCF Infinite loop in lpfc_sli4_fcf_rr_next_index_get. cxl: Enable PCI device ID for future IBM CXL adapter cxl: fix build for GCC 4.6.x cxlflash: Enable device id for future IBM CXL adapter cxlflash: Resolve oops in wait_port_offline cxlflash: Fix to resolve cmd leak after host reset cxl: Fix DSI misses when the context owning task exits cxl: Fix possible idr warning when contexts are released Drivers: hv: vmbus: fix rescind-offer handling for device without a driver Drivers: hv: vmbus: serialize process_chn_event() and vmbus_close_internal() Drivers: hv: vss: run only on supported host versions drivers/hv: cleanup synic msrs if vmbus connect failed Drivers: hv: util: catch allocation errors tools: hv: report ENOSPC errors in hv_fcopy_daemon Drivers: hv: utils: run polling callback always in interrupt context Drivers: hv: util: Increase the timeout for util services lightnvm: fix missing grown bad block type lightnvm: fix locking and mempool in rrpc_lun_gc lightnvm: unlock rq and free ppa_list on submission fail lightnvm: add check after mempool allocation lightnvm: fix incorrect nr_free_blocks stat lightnvm: fix bio submission issue cxlflash: a couple off by one bugs fm10k: Cleanup exception handling for mailbox interrupt fm10k: Cleanup MSI-X interrupts in case of failure fm10k: reinitialize queuing scheme after calling init_hw fm10k: always check init_hw for errors fm10k: reset max_queues on init_hw_vf failure fm10k: Fix handling of NAPI budget when multiple queues are enabled per vector fm10k: Correct MTU for jumbo frames fm10k: do not assume VF always has 1 queue clk: xgene: Fix divider with non-zero shift value e1000e: fix division by zero on jumbo MTUs e1000: fix data race between tx_ring->next_to_clean ixgbe: Fix handling of NAPI budget when multiple queues are enabled per vector igb: fix NULL derefs due to skipped SR-IOV enabling igb: use the correct i210 register for EEMNGCTL igb: don't unmap NULL hw_addr i40e: Fix Rx hash reported to the stack by our driver i40e: clean whole mac filter list i40evf: check rings before freeing resources i40e: don't add zero MAC filter i40e: properly delete VF MAC filters i40e: Fix memory leaks, sideband filter programming i40e: fix: do not sleep in netdev_ops i40e/i40evf: Fix RS bit update in Tx path and disable force WB workaround i40evf: handle many MAC filters correctly i40e: Workaround fix for mss < 256 issue UPSTREAM: audit: fix a double fetch in audit_log_single_execve_arg() UPSTREAM: ARM: 8494/1: mm: Enable PXN when running non-LPAE kernel on LPAE processor FIXUP: sched/tune: update accouting before CPU capacity FIXUP: sched/tune: add fixes missing from a previous patch arm: Fix #if/#ifdef typo in topology.c arm: Fix build error "conflicting types for 'scale_cpu_capacity'" sched/walt: use do_div instead of division operator DEBUG: cpufreq: fix cpu_capacity tracing build for non-smp systems sched/walt: include missing header for arm_timer_read_counter() cpufreq: Kconfig: Fixup incorrect selection by CPU_FREQ_DEFAULT_GOV_SCHED sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats() FIXUP: sched: scheduler-driven cpu frequency selection sched/rt: Add Kconfig option to enable panicking for RT throttling sched/rt: print RT tasks when RT throttling is activated UPSTREAM: sched: Fix a race between __kthread_bind() and sched_setaffinity() sched/fair: Favor higher cpus only for boosted tasks vmstat: make vmstat_updater deferrable again and shut down on idle sched/fair: call OPP update when going idle after migration sched/cpufreq_sched: fix thermal capping events sched/fair: Picking cpus with low OPPs for tasks that prefer idle CPUs FIXUP: sched/tune: do initialization as a postcore_initicall DEBUG: sched: add tracepoint for RD overutilized sched/tune: Introducing a new schedtune attribute prefer_idle sched: use util instead of capacity to select busy cpu arch_timer: add error handling when the MPM global timer is cleared FIXUP: sched: Fix double-release of spinlock in move_queued_task FIXUP: sched/fair: Fix hang during suspend in sched_group_energy FIXUP: sched: fix SchedFreq integration for both PELT and WALT sched: EAS: Avoid causing spikes to max-freq unnecessarily FIXUP: sched: fix set_cfs_cpu_capacity when WALT is in use sched/walt: Accounting for number of irqs pending on each core sched: Introduce Window Assisted Load Tracking (WALT) sched/tune: fix PB and PC cuts indexes definition sched/fair: optimize idle cpu selection for boosted tasks FIXUP: sched/tune: fix accounting for runnable tasks sched/tune: use a single initialisation function sched/{fair,tune}: simplify fair.c code FIXUP: sched/tune: fix payoff calculation for boost region sched/tune: Add support for negative boost values FIX: sched/tune: move schedtune_nornalize_energy into fair.c FIX: sched/tune: update usage of boosted task utilisation on CPU selection sched/fair: add tunable to set initial task load sched/fair: add tunable to force selection at cpu granularity sched: EAS: take cstate into account when selecting idle core sched/cpufreq_sched: Consolidated update FIXUP: sched: fix build for non-SMP target DEBUG: sched/tune: add tracepoint on P-E space filtering DEBUG: sched/tune: add tracepoint for energy_diff() values DEBUG: sched/tune: add tracepoint for task boost signal arm: topology: Define TC2 energy and provide it to the scheduler CHROMIUM: sched: update the average of nr_running DEBUG: schedtune: add tracepoint for schedtune_tasks_update() values DEBUG: schedtune: add tracepoint for CPU boost signal DEBUG: schedtune: add tracepoint for SchedTune configuration update DEBUG: sched: add energy procfs interface DEBUG: sched,cpufreq: add cpu_capacity change tracepoint DEBUG: sched: add tracepoint for CPU load/util signals DEBUG: sched: add tracepoint for task load/util signals DEBUG: sched: add tracepoint for cpu/freq scale invariance sched/fair: filter energy_diff() based on energy_payoff value sched/tune: add support to compute normalized energy sched/fair: keep track of energy/capacity variations sched/fair: add boosted task utilization sched/{fair,tune}: track RUNNABLE tasks impact on per CPU boost value sched/tune: compute and keep track of per CPU boost value sched/tune: add initial support for CGroups based boosting sched/fair: add boosted CPU usage sched/fair: add function to convert boost value into "margin" sched/tune: add sysctl interface to define a boost value sched/tune: add detailed documentation fixup! sched/fair: jump to max OPP when crossing UP threshold fixup! sched: scheduler-driven cpu frequency selection sched: rt scheduler sets capacity requirement sched: deadline: use deadline bandwidth in scale_rt_capacity sched: remove call of sched_avg_update from sched_rt_avg_update sched/cpufreq_sched: add trace events sched/fair: jump to max OPP when crossing UP threshold sched/fair: cpufreq_sched triggers for load balancing sched/{core,fair}: trigger OPP change request on fork() sched/fair: add triggers for OPP change requests sched: scheduler-driven cpu frequency selection cpufreq: introduce cpufreq_driver_is_slow sched: Consider misfit tasks when load-balancing sched: Add group_misfit_task load-balance type sched: Add per-cpu max capacity to sched_group_capacity sched: Do eas idle balance regardless of the rq avg idle value arm64: Enable max freq invariant scheduler load-tracking and capacity support arm: Enable max freq invariant scheduler load-tracking and capacity support sched: Update max cpu capacity in case of max frequency constraints cpufreq: Max freq invariant scheduler load-tracking and cpu capacity support arm64, topology: Updates to use DT bindings for EAS costing data sched: Support for extracting EAS energy costs from DT Documentation: DT bindings for energy model cost data required by EAS sched: Disable energy-unfriendly nohz kicks sched: Consider a not over-utilized energy-aware system as balanced sched: Energy-aware wake-up task placement sched: Determine the current sched_group idle-state sched, cpuidle: Track cpuidle state index in the scheduler sched: Add over-utilization/tipping point indicator sched: Estimate energy impact of scheduling decisions sched: Extend sched_group_energy to test load-balancing decisions sched: Calculate energy consumption of sched_group sched: Highest energy aware balancing sched_domain level pointer sched: Relocated cpu_util() and change return type sched: Compute cpu capacity available at current frequency arm64: Cpu invariant scheduler load-tracking and capacity support arm: Cpu invariant scheduler load-tracking and capacity support sched: Introduce SD_SHARE_CAP_STATES sched_domain flag sched: Initialize energy data structures sched: Introduce energy data structures sched: Make energy awareness a sched feature sched: Documentation for scheduler energy cost model sched: Prevent unnecessary active balance of single task in sched group sched: Enable idle balance to pull single task towards cpu with higher capacity sched: Consider spare cpu capacity at task wake-up sched: Add cpu capacity awareness to wakeup balancing sched: Store system-wide maximum cpu capacity in root domain arm: Update arch_scale_cpu_capacity() to reflect change to define arm64: Enable frequency invariant scheduler load-tracking support arm: Enable frequency invariant scheduler load-tracking support cpufreq: Frequency invariant scheduler load-tracking support sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task() FROMLIST: pstore: drop pmsg bounce buffer UPSTREAM: usercopy: remove page-spanning test for now UPSTREAM: usercopy: force check_object_size() inline BACKPORT: usercopy: fold builtin_const check into inline function UPSTREAM: x86/uaccess: force copy_*_user() to be inlined UPSTREAM: HID: core: prevent out-of-bound readings Android: Fix build breakages. UPSTREAM: tty: Prevent ldisc drivers from re-using stale tty fields UPSTREAM: netfilter: nfnetlink: correctly validate length of batch messages cpuset: Make cpusets restore on hotplug UPSTREAM: mm/slub: support left redzone UPSTREAM: Make the hardened user-copy code depend on having a hardened allocator Android: MMC/UFS IO Latency Histograms. UPSTREAM: usercopy: fix overlap check for kernel text UPSTREAM: usercopy: avoid potentially undefined behavior in pointer math UPSTREAM: unsafe_[get|put]_user: change interface to use a error target label BACKPORT: arm64: mm: fix location of _etext BACKPORT: ARM: 8583/1: mm: fix location of _etext BACKPORT: Don't show empty tag stats for unprivileged uids UPSTREAM: tcp: fix use after free in tcp_xmit_retransmit_queue() ANDROID: base-cfg: drop SECCOMP_FILTER config UPSTREAM: [media] xc2028: unlock on error in xc2028_set_config() UPSTREAM: [media] xc2028: avoid use after free ANDROID: base-cfg: enable SECCOMP config ANDROID: rcu_sync: Export rcu_sync_lockdep_assert RFC: FROMLIST: cgroup: reduce read locked section of cgroup_threadgroup_rwsem during fork RFC: FROMLIST: cgroup: avoid synchronize_sched() in __cgroup_procs_write() RFC: FROMLIST: locking/percpu-rwsem: Optimize readers and reduce global impact net: ipv6: Fix ping to link-local addresses. ipv6: fix endianness error in icmpv6_err ANDROID: dm: android-verity: Allow android-verity to be compiled as an independent module backporting: a brief introduce of backported feautures on 4.4 Linux 4.4.20 sysfs: correctly handle read offset on PREALLOC attrs hwmon: (iio_hwmon) fix memory leak in name attribute ALSA: line6: Fix POD sysfs attributes segfault ALSA: line6: Give up on the lock while URBs are released. ALSA: line6: Remove double line6_pcm_release() after failed acquire. ACPI / SRAT: fix SRAT parsing order with both LAPIC and X2APIC present ACPI / sysfs: fix error code in get_status() ACPI / drivers: replace acpi_probe_lock spinlock with mutex ACPI / drivers: fix typo in ACPI_DECLARE_PROBE_ENTRY macro staging: comedi: ni_mio_common: fix wrong insn_write handler staging: comedi: ni_mio_common: fix AO inttrig backwards compatibility staging: comedi: comedi_test: fix timer race conditions staging: comedi: daqboard2000: bug fix board type matching code USB: serial: option: add WeTelecom 0x6802 and 0x6803 products USB: serial: option: add WeTelecom WM-D200 USB: serial: mos7840: fix non-atomic allocation in write path USB: serial: mos7720: fix non-atomic allocation in write path USB: fix typo in wMaxPacketSize validation usb: chipidea: udc: don't touch DP when controller is in host mode USB: avoid left shift by -1 dmaengine: usb-dmac: check CHCR.DE bit in usb_dmac_isr_channel() crypto: qat - fix aes-xts key sizes crypto: nx - off by one bug in nx_of_update_msc() Input: i8042 - set up shared ps2_cmd_mutex for AUX ports Input: i8042 - break load dependency between atkbd/psmouse and i8042 Input: tegra-kbc - fix inverted reset logic btrfs: properly track when rescan worker is running btrfs: waiting on qgroup rescan should not always be interruptible fs/seq_file: fix out-of-bounds read gpio: Fix OF build problem on UM usb: renesas_usbhs: gadget: fix return value check in usbhs_mod_gadget_probe() megaraid_sas: Fix probing cards without io port mpt3sas: Fix resume on WarpDrive flash cards cdc-acm: fix wrong pipe type on rx interrupt xfers i2c: cros-ec-tunnel: Fix usage of cros_ec_cmd_xfer() mfd: cros_ec: Add cros_ec_cmd_xfer_status() helper aacraid: Check size values after double-fetch from user ARC: Elide redundant setup of DMA callbacks ARC: Call trace_hardirqs_on() before enabling irqs ARC: use correct offset in pt_regs for saving/restoring user mode r25 ARC: build: Better way to detect ISA compatible toolchain drm/i915: fix aliasing_ppgtt leak drm/amdgpu: record error code when ring test failed drm/amd/amdgpu: sdma resume fail during S4 on CI drm/amdgpu: skip TV/CV in display parsing drm/amdgpu: avoid a possible array overflow drm/amdgpu: fix amdgpu_move_blit on 32bit systems drm/amdgpu: Change GART offset to 64-bit iio: fix sched WARNING "do not call blocking ops when !TASK_RUNNING" sched/nohz: Fix affine unpinned timers mess sched/cputime: Fix NO_HZ_FULL getrusage() monotonicity regression of: fix reference counting in of_graph_get_endpoint_by_regs arm64: dts: rockchip: add reset saradc node for rk3368 SoCs mac80211: fix purging multicast PS buffer queue s390/dasd: fix hanging device after clear subchannel EDAC: Increment correct counter in edac_inc_ue_error() pinctrl/amd: Remove the default de-bounce time iommu/arm-smmu: Don't BUG() if we find aborting STEs with disable_bypass iommu/arm-smmu: Fix CMDQ error handling iommu/dma: Don't put uninitialised IOVA domains xhci: Make sure xhci handles USB_SPEED_SUPER_PLUS devices. USB: serial: ftdi_sio: add PIDs for Ivium Technologies devices USB: serial: ftdi_sio: add device ID for WICED USB UART dev board USB: serial: option: add support for Telit LE920A4 USB: serial: option: add D-Link DWM-156/A3 USB: serial: fix memleak in driver-registration error path xhci: don't dereference a xhci member after removing xhci usb: xhci: Fix panic if disconnect xhci: always handle "Command Ring Stopped" events usb/gadget: fix gadgetfs aio support. usb: gadget: fsl_qe_udc: off by one in setup_received_handle() USB: validate wMaxPacketValue entries in endpoint descriptors usb: renesas_usbhs: Use dmac only if the pipe type is bulk usb: renesas_usbhs: clear the BRDYSTS in usbhsg_ep_enable() USB: hub: change the locking in hub_activate USB: hub: fix up early-exit pathway in hub_activate usb: hub: Fix unbalanced reference count/memory leak/deadlocks usb: define USB_SPEED_SUPER_PLUS speed for SuperSpeedPlus USB3.1 devices usb: dwc3: gadget: increment request->actual once usb: dwc3: pci: add Intel Kabylake PCI ID usb: misc: usbtest: add fix for driver hang usb: ehci: change order of register cleanup during shutdown crypto: caam - defer aead_set_sh_desc in case of zero authsize crypto: caam - fix echainiv(authenc) encrypt shared descriptor crypto: caam - fix non-hmac hashes genirq/msi: Make sure PCI MSIs are activated early genirq/msi: Remove unused MSI_FLAG_IDENTITY_MAP um: Don't discard .text.exit section ACPI / CPPC: Prevent cpc_desc_ptr points to the invalid data ACPI: CPPC: Return error if _CPC is invalid on a CPU mmc: sdhci-acpi: Reduce Baytrail eMMC/SD/SDIO hangs PCI: Limit config space size for Netronome NFP4000 PCI: Add Netronome NFP4000 PF device ID PCI: Limit config space size for Netronome NFP6000 family PCI: Add Netronome vendor and device IDs PCI: Support PCIe devices with short cfg_size NVMe: Don't unmap controller registers on reset ALSA: hda - Manage power well properly for resume libnvdimm, nd_blk: mask off reserved status bits perf intel-pt: Fix occasional decoding errors when tracing system-wide vfio/pci: Fix NULL pointer oops in error interrupt setup handling virtio: fix memory leak in virtqueue_add() parisc: Fix order of EREFUSED define in errno.h arm64: Define AT_VECTOR_SIZE_ARCH for ARCH_DLINFO ALSA: usb-audio: Add quirk for ELP HD USB Camera ALSA: usb-audio: Add a sample rate quirk for Creative Live! Cam Socialize HD (VF0610) powerpc/eeh: eeh_pci_enable(): fix checking of post-request state SUNRPC: allow for upcalls for same uid but different gss service SUNRPC: Handle EADDRNOTAVAIL on connection failures tools/testing/nvdimm: fix SIGTERM vs hotplug crash uprobes/x86: Fix RIP-relative handling of EVEX-encoded instructions x86/mm: Disable preemption during CR3 read+write hugetlb: fix nr_pmds accounting with shared page tables mm: SLUB hardened usercopy support mm: SLAB hardened usercopy support s390/uaccess: Enable hardened usercopy sparc/uaccess: Enable hardened usercopy powerpc/uaccess: Enable hardened usercopy ia64/uaccess: Enable hardened usercopy arm64/uaccess: Enable hardened usercopy ARM: uaccess: Enable hardened usercopy x86/uaccess: Enable hardened usercopy x86: remove more uaccess_32.h complexity x86: remove pointless uaccess_32.h complexity x86: fix SMAP in 32-bit environments Use the new batched user accesses in generic user string handling Add 'unsafe' user access functions for batched accesses x86: reorganize SMAP handling in user space accesses mm: Hardened usercopy mm: Implement stack frame object validation mm: Add is_migrate_cma_page Linux 4.4.19 Documentation/module-signing.txt: Note need for version info if reusing a key module: Invalidate signatures on force-loaded modules dm flakey: error READ bios during the down_interval rtc: s3c: Add s3c_rtc_{enable/disable}_clk in s3c_rtc_setfreq() lpfc: fix oops in lpfc_sli4_scmd_to_wqidx_distr() from lpfc_send_taskmgmt() ACPI / EC: Work around method reentrancy limit in ACPICA for _Qxx x86/platform/intel_mid_pci: Rework IRQ0 workaround PCI: Mark Atheros AR9485 and QCA9882 to avoid bus reset MIPS: hpet: Increase HPET_MIN_PROG_DELTA and decrease HPET_MIN_CYCLES MIPS: Don't register r4k sched clock when CPUFREQ enabled MIPS: mm: Fix definition of R6 cache instruction SUNRPC: Don't allocate a full sockaddr_storage for tracing Input: elan_i2c - properly wake up touchpad on ASUS laptops target: Fix ordered task CHECK_CONDITION early exception handling target: Fix max_unmap_lba_count calc overflow target: Fix race between iscsi-target connection shutdown + ABORT_TASK target: Fix missing complete during ABORT_TASK + CMD_T_FABRIC_STOP target: Fix ordered task target_setup_cmd_from_cdb exception hang iscsi-target: Fix panic when adding second TCP connection to iSCSI session ubi: Fix race condition between ubi device creation and udev ubi: Fix early logging ubi: Make volume resize power cut aware of: fix memory leak related to safe_name() IB/mlx4: Fix memory leak if QP creation failed IB/mlx4: Fix error flow when sending mads under SRIOV IB/mlx4: Fix the SQ size of an RC QP IB/IWPM: Fix a potential skb leak IB/IPoIB: Don't update neigh validity for unresolved entries IB/SA: Use correct free function IB/mlx5: Return PORT_ERR in Active to Initializing tranisition IB/mlx5: Fix post send fence logic IB/mlx5: Fix entries check in mlx5_ib_resize_cq IB/mlx5: Fix returned values of query QP IB/mlx5: Fix entries checks in mlx5_ib_create_cq IB/mlx5: Fix MODIFY_QP command input structure ALSA: hda - Fix headset mic detection problem for two dell machines ALSA: hda: add AMD Bonaire AZ PCI ID with proper driver caps ALSA: hda/realtek - Can't adjust speaker's volume on a Dell AIO ALSA: hda: Fix krealloc() with __GFP_ZERO usage mm/hugetlb: avoid soft lockup in set_max_huge_pages() mtd: nand: fix bug writing 1 byte less than page size block: fix bdi vs gendisk lifetime mismatch block: add missing group association in bio-cloning functions metag: Fix __cmpxchg_u32 asm constraint for CMP ftrace/recordmcount: Work around for addition of metag magic but not relocations balloon: check the number of available pages in leak balloon drm/i915/dp: Revert "drm/i915/dp: fall back to 18 bpp when sink capability is unknown" drm/i915: Never fully mask the the EI up rps interrupt on SNB/IVB drm/edid: Add 6 bpc quirk for display AEO model 0. drm: Restore double clflush on the last partial cacheline drm/nouveau/fbcon: fix font width not divisible by 8 drm/nouveau/gr/nv3x: fix instobj write offsets in gr setup drm/nouveau: check for supported chipset before booting fbdev off the hw drm/radeon: support backlight control for UNIPHY3 drm/radeon: fix firmware info version checks drm/radeon: Poll for both connect/disconnect on analog connectors drm/radeon: add a delay after ATPX dGPU power off drm/amdgpu/gmc7: add missing mullins case drm/amdgpu: fix firmware info version checks drm/amdgpu: Disable RPM helpers while reprobing connectors on resume drm/amdgpu: support backlight control for UNIPHY3 drm/amdgpu: Poll for both connect/disconnect on analog connectors drm/amdgpu: add a delay after ATPX dGPU power off w1:omap_hdq: fix regression netlabel: add address family checks to netlbl_{sock,req}_delattr() ARM: dts: sunxi: Add a startup delay for fixed regulator enabled phys audit: fix a double fetch in audit_log_single_execve_arg() iommu/amd: Update Alias-DTE in update_device_table() iommu/amd: Init unity mappings only for dma_ops domains iommu/amd: Handle IOMMU_DOMAIN_DMA in ops->domain_free call-back iommu/vt-d: Return error code in domain_context_mapping_one() iommu/exynos: Suppress unbinding to prevent system failure drm/i915: Don't complain about lack of ACPI video bios nfsd: don't return an unhashed lock stateid after taking mutex nfsd: Fix race between FREE_STATEID and LOCK nfs: don't create zero-length requests MIPS: KVM: Propagate kseg0/mapped tlb fault errors MIPS: KVM: Fix gfn range check in kseg0 tlb faults MIPS: KVM: Add missing gfn range check MIPS: KVM: Fix mapped fault broken commpage handling random: add interrupt callback to VMBus IRQ handler random: print a warning for the first ten uninitialized random users random: initialize the non-blocking pool via add_hwgenerator_randomness() CIFS: Fix a possible invalid memory access in smb2_query_symlink() cifs: fix crash due to race in hmac(md5) handling cifs: Check for existing directory when opening file with O_CREAT fs/cifs: make share unaccessible at root level mountable jbd2: make journal y2038 safe ARC: mm: don't loose PTE_SPECIAL in pte_modify() remoteproc: Fix potential race condition in rproc_add ovl: disallow overlayfs as upperdir HID: uhid: fix timeout when probe races with IO EDAC: Correct channel count limit Bluetooth: Fix l2cap_sock_setsockopt() with optname BT_RCVMTU spi: pxa2xx: Clear all RFT bits in reset_sccr1() on Intel Quark i2c: efm32: fix a failure path in efm32_i2c_probe() s5p-mfc: Add release callback for memory region devs s5p-mfc: Set device name for reserved memory region devs hp-wmi: Fix wifi cannot be hard-unblocked dm: set DMF_SUSPENDED* _before_ clearing DMF_NOFLUSH_SUSPENDING sur40: fix occasional oopses on device close sur40: lower poll interval to fix occasional FPS drops to ~56 FPS Fix RC5 decoding with Fintek CIR chipset vb2: core: Skip planes array verification if pb is NULL videobuf2-v4l2: Verify planes array in buffer dequeueing media: dvb_ringbuffer: Add memory barriers media: usbtv: prevent access to free'd resources mfd: qcom_rpm: Parametrize also ack selector size mfd: qcom_rpm: Fix offset error for msm8660 intel_pstate: Fix MSR_CONFIG_TDP_x addressing in core_get_max_pstate() s390/cio: allow to reset channel measurement block KVM: nVMX: Fix memory corruption when using VMCS shadowing KVM: VMX: handle PML full VMEXIT that occurs during event delivery KVM: MTRR: fix kvm_mtrr_check_gfn_range_consistency page fault KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures arm64: mm: avoid fdt_check_header() before the FDT is fully mapped arm64: dts: rockchip: fixes the gic400 2nd region size for rk3368 pinctrl: cherryview: prevent concurrent access to GPIO controllers Bluetooth: hci_intel: Fix null gpio desc pointer dereference gpio: intel-mid: Remove potentially harmful code gpio: pca953x: Fix NBANK calculation for PCA9536 tty/serial: atmel: fix RS485 half duplex with DMA serial: samsung: Fix ERR pointer dereference on deferred probe tty: serial: msm: Don't read off end of tx fifo arm64: Fix incorrect per-cpu usage for boot CPU arm64: debug: unmask PSTATE.D earlier arm64: kernel: Save and restore UAO and addr_limit on exception entry USB: usbfs: fix potential infoleak in devio usb: renesas_usbhs: fix NULL pointer dereference in xfer_work() USB: serial: option: add support for Telit LE910 PID 0x1206 usb: dwc3: fix for the isoc transfer EP_BUSY flag usb: quirks: Add no-lpm quirk for Elan usb: renesas_usbhs: protect the CFIFOSEL setting in usbhsg_ep_enable() usb: f_fs: off by one bug in _ffs_func_bind() usb: gadget: avoid exposing kernel stack UPSTREAM: usb: gadget: configfs: add mutex lock before unregister gadget ANDROID: dm-verity: adopt changes made to dm callbacks UPSTREAM: ecryptfs: fix handling of directory opening ANDROID: net: core: fix UID-based routing ANDROID: net: fib: remove duplicate assignment FROMLIST: proc: Fix timerslack_ns CAP_SYS_NICE check when adjusting self ANDROID: dm verity fec: pack the fec_header structure ANDROID: dm: android-verity: Verify header before fetching table ANDROID: dm: allow adb disable-verity only in userdebug ANDROID: dm: mount as linear target if eng build ANDROID: dm: use default verity public key ANDROID: dm: fix signature verification flag ANDROID: dm: use name_to_dev_t ANDROID: dm: rename dm-linear methods for dm-android-verity ANDROID: dm: Minor cleanup ANDROID: dm: Mounting root as linear device when verity disabled ANDROID: dm-android-verity: Rebase on top of 4.1 ANDROID: dm: Add android verity target ANDROID: dm: fix dm_substitute_devices() ANDROID: dm: Rebase on top of 4.1 CHROMIUM: dm: boot time specification of dm= Implement memory_state_time, used by qcom,cpubw Revert "panic: Add board ID to panic output" usb: gadget: f_accessory: remove duplicate endpoint alloc BACKPORT: brcmfmac: defer DPC processing during probe FROMLIST: proc: Add LSM hook checks to /proc/<tid>/timerslack_ns FROMLIST: proc: Relax /proc/<tid>/timerslack_ns capability requirements UPSTREAM: ppp: defer netns reference release for ppp channel cpuset: Add allow_attach hook for cpusets on android. UPSTREAM: KEYS: Fix ASN.1 indefinite length object parsing ANDROID: sdcardfs: fix itnull.cocci warnings android-recommended.cfg: enable fstack-protector-strong Linux 4.4.18 mm: memcontrol: fix memcg id ref counter on swap charge move mm: memcontrol: fix swap counter leak on swapout from offline cgroup mm: memcontrol: fix cgroup creation failure after many small jobs ext4: fix reference counting bug on block allocation error ext4: short-cut orphan cleanup on error ext4: validate s_reserved_gdt_blocks on mount ext4: don't call ext4_should_journal_data() on the journal inode ext4: fix deadlock during page writeback ext4: check for extents that wrap around crypto: scatterwalk - Fix test in scatterwalk_done crypto: gcm - Filter out async ghash if necessary fs/dcache.c: avoid soft-lockup in dput() fuse: fix wrong assignment of ->flags in fuse_send_init() fuse: fuse_flush must check mapping->flags for errors fuse: fsync() did not return IO errors sysv, ipc: fix security-layer leaking block: fix use-after-free in seq file x86/syscalls/64: Add compat_sys_keyctl for 32-bit userspace drm/i915: Pretend cursor is always on for ILK-style WM calculations (v2) x86/mm/pat: Fix BUG_ON() in mmap_mem() on QEMU/i386 x86/pat: Document the PAT initialization sequence x86/xen, pat: Remove PAT table init code from Xen x86/mtrr: Fix PAT init handling when MTRR is disabled x86/mtrr: Fix Xorg crashes in Qemu sessions x86/mm/pat: Replace cpu_has_pat with boot_cpu_has() x86/mm/pat: Add pat_disable() interface x86/mm/pat: Add support of non-default PAT MSR setting devpts: clean up interface to pty drivers random: strengthen input validation for RNDADDTOENTCNT apparmor: fix ref count leak when profile sha1 hash is read Revert "s390/kdump: Clear subchannel ID to signal non-CCW/SCSI IPL" KEYS: 64-bit MIPS needs to use compat_sys_keyctl for 32-bit userspace arm: oabi compat: add missing access checks cdc_ncm: do not call usbnet_link_change from cdc_ncm_bind i2c: i801: Allow ACPI SystemIO OpRegion to conflict with PCI BAR x86/mm/32: Enable full randomization on i386 and X86_32 HID: sony: do not bail out when the sixaxis refuses the output report PNP: Add Broadwell to Intel MCH size workaround PNP: Add Haswell-ULT to Intel MCH size workaround scsi: ignore errors from scsi_dh_add_device() ipath: Restrict use of the write() interface tcp: consider recv buf for the initial window scale qed: Fix setting/clearing bit in completion bitmap net/irda: fix NULL pointer dereference on memory allocation failure net: bgmac: Fix infinite loop in bgmac_dma_tx_add() bonding: set carrier off for devices created through netlink ipv4: reject RTNH_F_DEAD and RTNH_F_LINKDOWN from user space tcp: enable per-socket rate limiting of all 'challenge acks' tcp: make challenge acks less predictable arm64: relocatable: suppress R_AARCH64_ABS64 relocations in vmlinux arm64: vmlinux.lds: make __rela_offset and __dynsym_offset ABSOLUTE Linux 4.4.17 vfs: fix deadlock in file_remove_privs() on overlayfs intel_th: Fix a deadlock in modprobing intel_th: pci: Add Kaby Lake PCH-H support net: mvneta: set real interrupt per packet for tx_done libceph: apply new_state before new_up_client on incrementals libata: LITE-ON CX1-JB256-HP needs lower max_sectors i2c: mux: reg: wrong condition checked for of_address_to_resource return value posix_cpu_timer: Exit early when process has been reaped media: fix airspy usb probe error path ipr: Clear interrupt on croc/crocodile when running with LSI SCSI: fix new bug in scsi_dev_info_list string matching RDS: fix rds_tcp_init() error path can: fix oops caused by wrong rtnl dellink usage can: fix handling of unmodifiable configuration options fix can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera Cyclone access can: at91_can: RX queue could get stuck at high bus load perf/x86: fix PEBS issues on Intel Atom/Core2 ovl: handle ATTR_KILL* sched/fair: Fix effective_load() to consistently use smoothed load mmc: block: fix packed command header endianness block: fix use-after-free in sys_ioprio_get() qeth: delete napi struct when removing a qeth device platform/chrome: cros_ec_dev - double fetch bug in ioctl clk: rockchip: initialize flags of clk_init_data in mmc-phase clock spi: sun4i: fix FIFO limit spi: sunxi: fix transfer timeout namespace: update event counter when umounting a deleted dentry 9p: use file_dentry() ext4: verify extent header depth ecryptfs: don't allow mmap when the lower fs doesn't support it Revert "ecryptfs: forbid opening files without mmap handler" locks: use file_inode() power_supply: power_supply_read_temp only if use_cnt > 0 cgroup: set css->id to -1 during init pinctrl: imx: Do not treat a PIN without MUX register as an error pinctrl: single: Fix missing flush of posted write for a wakeirq pvclock: Add CPU barriers to get correct version value Input: tsc200x - report proper input_dev name Input: xpad - validate USB endpoint count during probe Input: wacom_w8001 - w8001_MAX_LENGTH should be 13 Input: xpad - fix oops when attaching an unknown Xbox One gamepad Input: elantech - add more IC body types to the list Input: vmmouse - remove port reservation ALSA: timer: Fix leak in events via snd_timer_user_tinterrupt ALSA: timer: Fix leak in events via snd_timer_user_ccallback ALSA: timer: Fix leak in SNDRV_TIMER_IOCTL_PARAMS xenbus: don't bail early from xenbus_dev_request_and_reply() xenbus: don't BUG() on user mode induced condition xen/pciback: Fix conf_space read/write overlap check. ARC: unwind: ensure that .debug_frame is generated (vs. .eh_frame) arc: unwind: warn only once if DW2_UNWIND is disabled kernel/sysrq, watchdog, sched/core: Reset watchdog on all CPUs while processing sysrq-w pps: do not crash when failed to register vmlinux.lds: account for destructor sections mm, meminit: ensure node is online before checking whether pages are uninitialised mm, meminit: always return a valid node from early_pfn_to_nid mm, compaction: prevent VM_BUG_ON when terminating freeing scanner fs/nilfs2: fix potential underflow in call to crc32_le mm, compaction: abort free scanner if split fails mm, sl[au]b: add __GFP_ATOMIC to the GFP reclaim mask dmaengine: at_xdmac: double FIFO flush needed to compute residue dmaengine: at_xdmac: fix residue corruption dmaengine: at_xdmac: align descriptors on 64 bits x86/quirks: Add early quirk to reset Apple AirPort card x86/quirks: Reintroduce scanning of secondary buses x86/quirks: Apply nvidia_bugs quirk only on root bus USB: OHCI: Don't mark EDs as ED_OPER if scheduling fails Conflicts: arch/arm/kernel/topology.c arch/arm64/include/asm/arch_gicv3.h arch/arm64/kernel/topology.c block/bio.c drivers/cpufreq/Kconfig drivers/md/Makefile drivers/media/dvb-core/dvb_ringbuffer.c drivers/media/tuners/tuner-xc2028.c drivers/misc/Kconfig drivers/misc/Makefile drivers/mmc/core/host.c drivers/scsi/ufs/ufshcd.c drivers/scsi/ufs/ufshcd.h drivers/usb/dwc3/gadget.c drivers/usb/gadget/configfs.c fs/ecryptfs/file.c include/linux/mmc/core.h include/linux/mmc/host.h include/linux/mmzone.h include/linux/sched.h include/linux/sched/sysctl.h include/trace/events/power.h include/trace/events/sched.h init/Kconfig kernel/cpuset.c kernel/exit.c kernel/sched/Makefile kernel/sched/core.c kernel/sched/cputime.c kernel/sched/fair.c kernel/sched/features.h kernel/sched/rt.c kernel/sched/sched.h kernel/sched/stop_task.c kernel/sched/tune.c lib/Kconfig.debug mm/Makefile mm/vmstat.c Change-Id: I243a43231ca56a6362076fa6301827e1b0493be5 Signed-off-by: Runmin Wang <runminw@codeaurora.org>
Diffstat (limited to 'kernel/sched/tune.c')
-rw-r--r--kernel/sched/tune.c756
1 files changed, 734 insertions, 22 deletions
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index ee2af8e0b5ce..3c2e21c5a5a0 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -1,13 +1,108 @@
#include <linux/cgroup.h>
#include <linux/err.h>
+#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/printk.h>
+#include <linux/rcupdate.h>
#include <linux/slab.h>
+#include <trace/events/sched.h>
+
#include "sched.h"
+#include "tune.h"
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+static bool schedtune_initialized = false;
+#endif
unsigned int sysctl_sched_cfs_boost __read_mostly;
+extern struct target_nrg schedtune_target_nrg;
+
+/* Performance Boost region (B) threshold params */
+static int perf_boost_idx;
+
+/* Performance Constraint region (C) threshold params */
+static int perf_constrain_idx;
+
+/**
+ * Performance-Energy (P-E) Space thresholds constants
+ */
+struct threshold_params {
+ int nrg_gain;
+ int cap_gain;
+};
+
+/*
+ * System specific P-E space thresholds constants
+ */
+static struct threshold_params
+threshold_gains[] = {
+ { 0, 5 }, /* < 10% */
+ { 1, 5 }, /* < 20% */
+ { 2, 5 }, /* < 30% */
+ { 3, 5 }, /* < 40% */
+ { 4, 5 }, /* < 50% */
+ { 5, 4 }, /* < 60% */
+ { 5, 3 }, /* < 70% */
+ { 5, 2 }, /* < 80% */
+ { 5, 1 }, /* < 90% */
+ { 5, 0 } /* <= 100% */
+};
+
+static int
+__schedtune_accept_deltas(int nrg_delta, int cap_delta,
+ int perf_boost_idx, int perf_constrain_idx)
+{
+ int payoff = -INT_MAX;
+ int gain_idx = -1;
+
+ /* Performance Boost (B) region */
+ if (nrg_delta >= 0 && cap_delta > 0)
+ gain_idx = perf_boost_idx;
+ /* Performance Constraint (C) region */
+ else if (nrg_delta < 0 && cap_delta <= 0)
+ gain_idx = perf_constrain_idx;
+
+ /* Default: reject schedule candidate */
+ if (gain_idx == -1)
+ return payoff;
+
+ /*
+ * Evaluate "Performance Boost" vs "Energy Increase"
+ *
+ * - Performance Boost (B) region
+ *
+ * Condition: nrg_delta > 0 && cap_delta > 0
+ * Payoff criteria:
+ * cap_gain / nrg_gain < cap_delta / nrg_delta =
+ * cap_gain * nrg_delta < cap_delta * nrg_gain
+ * Note that since both nrg_gain and nrg_delta are positive, the
+ * inequality does not change. Thus:
+ *
+ * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
+ *
+ * - Performance Constraint (C) region
+ *
+ * Condition: nrg_delta < 0 && cap_delta < 0
+ * payoff criteria:
+ * cap_gain / nrg_gain > cap_delta / nrg_delta =
+ * cap_gain * nrg_delta < cap_delta * nrg_gain
+ * Note that since nrg_gain > 0 while nrg_delta < 0, the
+ * inequality change. Thus:
+ *
+ * payoff = (cap_delta * nrg_gain) - (cap_gain * nrg_delta)
+ *
+ * This means that, in case of same positive defined {cap,nrg}_gain
+ * for both the B and C regions, we can use the same payoff formula
+ * where a positive value represents the accept condition.
+ */
+ payoff = cap_delta * threshold_gains[gain_idx].nrg_gain;
+ payoff -= nrg_delta * threshold_gains[gain_idx].cap_gain;
+
+ return payoff;
+}
+
#ifdef CONFIG_CGROUP_SCHEDTUNE
/*
@@ -52,6 +147,15 @@ struct schedtune {
bool colocate_update_disabled;
#endif
+ /* Performance Boost (B) region threshold params */
+ int perf_boost_idx;
+
+ /* Performance Constraint (C) region threshold params */
+ int perf_constrain_idx;
+
+ /* Hint to bias scheduling of tasks on that SchedTune CGroup
+ * towards idle CPUs */
+ int prefer_idle;
};
static inline struct schedtune *css_st(struct cgroup_subsys_state *css)
@@ -88,8 +192,42 @@ root_schedtune = {
.colocate = false,
.colocate_update_disabled = false,
#endif
+ .perf_boost_idx = 0,
+ .perf_constrain_idx = 0,
+ .prefer_idle = 0,
};
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+ struct task_struct *task)
+{
+ struct schedtune *ct;
+ int perf_boost_idx;
+ int perf_constrain_idx;
+
+ /* Optimal (O) region */
+ if (nrg_delta < 0 && cap_delta > 0) {
+ trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
+ return INT_MAX;
+ }
+
+ /* Suboptimal (S) region */
+ if (nrg_delta > 0 && cap_delta < 0) {
+ trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
+ return -INT_MAX;
+ }
+
+ /* Get task specific perf Boost/Constraints indexes */
+ rcu_read_lock();
+ ct = task_schedtune(task);
+ perf_boost_idx = ct->perf_boost_idx;
+ perf_constrain_idx = ct->perf_constrain_idx;
+ rcu_read_unlock();
+
+ return __schedtune_accept_deltas(nrg_delta, cap_delta,
+ perf_boost_idx, perf_constrain_idx);
+}
+
/*
* Maximum number of boost groups to support
* When per-task boosting is used we still allow only limited number of
@@ -119,13 +257,16 @@ static struct schedtune *allocated_group[BOOSTGROUPS_COUNT] = {
*/
struct boost_groups {
/* Maximum boost value for all RUNNABLE tasks on a CPU */
- unsigned boost_max;
+ bool idle;
+ int boost_max;
struct {
/* The boost for tasks on that boost group */
- unsigned boost;
+ int boost;
/* Count of RUNNABLE tasks on that boost group */
unsigned tasks;
} group[BOOSTGROUPS_COUNT];
+ /* CPU's boost group locking */
+ raw_spinlock_t lock;
};
/* Boost groups affecting each CPU in the system */
@@ -246,7 +387,342 @@ static inline void init_sched_boost(struct schedtune *st) { }
#endif /* CONFIG_SCHED_HMP */
+static void
+schedtune_cpu_update(int cpu)
+{
+ struct boost_groups *bg;
+ int boost_max;
+ int idx;
+
+ bg = &per_cpu(cpu_boost_groups, cpu);
+
+ /* The root boost group is always active */
+ boost_max = bg->group[0].boost;
+ for (idx = 1; idx < BOOSTGROUPS_COUNT; ++idx) {
+ /*
+ * A boost group affects a CPU only if it has
+ * RUNNABLE tasks on that CPU
+ */
+ if (bg->group[idx].tasks == 0)
+ continue;
+
+ boost_max = max(boost_max, bg->group[idx].boost);
+ }
+ /* Ensures boost_max is non-negative when all cgroup boost values
+ * are neagtive. Avoids under-accounting of cpu capacity which may cause
+ * task stacking and frequency spikes.*/
+ boost_max = max(boost_max, 0);
+ bg->boost_max = boost_max;
+}
+
+static int
+schedtune_boostgroup_update(int idx, int boost)
+{
+ struct boost_groups *bg;
+ int cur_boost_max;
+ int old_boost;
+ int cpu;
+
+ /* Update per CPU boost groups */
+ for_each_possible_cpu(cpu) {
+ bg = &per_cpu(cpu_boost_groups, cpu);
+
+ /*
+ * Keep track of current boost values to compute the per CPU
+ * maximum only when it has been affected by the new value of
+ * the updated boost group
+ */
+ cur_boost_max = bg->boost_max;
+ old_boost = bg->group[idx].boost;
+
+ /* Update the boost value of this boost group */
+ bg->group[idx].boost = boost;
+
+ /* Check if this update increase current max */
+ if (boost > cur_boost_max && bg->group[idx].tasks) {
+ bg->boost_max = boost;
+ trace_sched_tune_boostgroup_update(cpu, 1, bg->boost_max);
+ continue;
+ }
+
+ /* Check if this update has decreased current max */
+ if (cur_boost_max == old_boost && old_boost > boost) {
+ schedtune_cpu_update(cpu);
+ trace_sched_tune_boostgroup_update(cpu, -1, bg->boost_max);
+ continue;
+ }
+
+ trace_sched_tune_boostgroup_update(cpu, 0, bg->boost_max);
+ }
+
+ return 0;
+}
+
+#define ENQUEUE_TASK 1
+#define DEQUEUE_TASK -1
+
+static inline void
+schedtune_tasks_update(struct task_struct *p, int cpu, int idx, int task_count)
+{
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ int tasks = bg->group[idx].tasks + task_count;
+
+ /* Update boosted tasks count while avoiding to make it negative */
+ bg->group[idx].tasks = max(0, tasks);
+
+ trace_sched_tune_tasks_update(p, cpu, tasks, idx,
+ bg->group[idx].boost, bg->boost_max);
+
+ /* Boost group activation or deactivation on that RQ */
+ if (tasks == 1 || tasks == 0)
+ schedtune_cpu_update(cpu);
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_enqueue_task(struct task_struct *p, int cpu)
+{
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ unsigned long irq_flags;
+ struct schedtune *st;
+ int idx;
+
+ if (!unlikely(schedtune_initialized))
+ return;
+
+ /*
+ * When a task is marked PF_EXITING by do_exit() it's going to be
+ * dequeued and enqueued multiple times in the exit path.
+ * Thus we avoid any further update, since we do not want to change
+ * CPU boosting while the task is exiting.
+ */
+ if (p->flags & PF_EXITING)
+ return;
+
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions for example on
+ * do_exit()::cgroup_exit() and task migration.
+ */
+ raw_spin_lock_irqsave(&bg->lock, irq_flags);
+ rcu_read_lock();
+
+ st = task_schedtune(p);
+ idx = st->idx;
+
+ schedtune_tasks_update(p, cpu, idx, ENQUEUE_TASK);
+
+ rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+int schedtune_allow_attach(struct cgroup_taskset *tset)
+{
+ /* We always allows tasks to be moved between existing CGroups */
+ return 0;
+}
+
+int schedtune_can_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *css;
+ struct boost_groups *bg;
+ unsigned long irq_flags;
+ unsigned int cpu;
+ struct rq *rq;
+ int src_bg; /* Source boost group index */
+ int dst_bg; /* Destination boost group index */
+ int tasks;
+
+ if (!unlikely(schedtune_initialized))
+ return 0;
+
+
+ cgroup_taskset_for_each(task, css, tset) {
+
+ /*
+ * Lock the CPU's RQ the task is enqueued to avoid race
+ * conditions with migration code while the task is being
+ * accounted
+ */
+ rq = lock_rq_of(task, &irq_flags);
+
+ if (!task->on_rq) {
+ unlock_rq_of(rq, task, &irq_flags);
+ continue;
+ }
+
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions on...
+ */
+ cpu = cpu_of(rq);
+ bg = &per_cpu(cpu_boost_groups, cpu);
+ raw_spin_lock(&bg->lock);
+
+ dst_bg = css_st(css)->idx;
+ src_bg = task_schedtune(task)->idx;
+
+ /*
+ * Current task is not changing boostgroup, which can
+ * happen when the new hierarchy is in use.
+ */
+ if (unlikely(dst_bg == src_bg)) {
+ raw_spin_unlock(&bg->lock);
+ unlock_rq_of(rq, task, &irq_flags);
+ continue;
+ }
+
+ /*
+ * This is the case of a RUNNABLE task which is switching its
+ * current boost group.
+ */
+
+ /* Move task from src to dst boost group */
+ tasks = bg->group[src_bg].tasks - 1;
+ bg->group[src_bg].tasks = max(0, tasks);
+ bg->group[dst_bg].tasks += 1;
+
+ raw_spin_unlock(&bg->lock);
+ unlock_rq_of(rq, task, &irq_flags);
+
+ /* Update CPU boost group */
+ if (bg->group[src_bg].tasks == 0 || bg->group[dst_bg].tasks == 1)
+ schedtune_cpu_update(task_cpu(task));
+
+ }
+
+ return 0;
+}
+
+void schedtune_cancel_attach(struct cgroup_taskset *tset)
+{
+ /* This can happen only if SchedTune controller is mounted with
+ * other hierarchies ane one of them fails. Since usually SchedTune is
+ * mouted on its own hierarcy, for the time being we do not implement
+ * a proper rollback mechanism */
+ WARN(1, "SchedTune cancel attach not implemented");
+}
+
+/*
+ * NOTE: This function must be called while holding the lock on the CPU RQ
+ */
+void schedtune_dequeue_task(struct task_struct *p, int cpu)
+{
+ struct boost_groups *bg = &per_cpu(cpu_boost_groups, cpu);
+ unsigned long irq_flags;
+ struct schedtune *st;
+ int idx;
+
+ if (!unlikely(schedtune_initialized))
+ return;
+
+ /*
+ * When a task is marked PF_EXITING by do_exit() it's going to be
+ * dequeued and enqueued multiple times in the exit path.
+ * Thus we avoid any further update, since we do not want to change
+ * CPU boosting while the task is exiting.
+ * The last dequeue is already enforce by the do_exit() code path
+ * via schedtune_exit_task().
+ */
+ if (p->flags & PF_EXITING)
+ return;
+
+ /*
+ * Boost group accouting is protected by a per-cpu lock and requires
+ * interrupt to be disabled to avoid race conditions on...
+ */
+ raw_spin_lock_irqsave(&bg->lock, irq_flags);
+ rcu_read_lock();
+
+ st = task_schedtune(p);
+ idx = st->idx;
+
+ schedtune_tasks_update(p, cpu, idx, DEQUEUE_TASK);
+
+ rcu_read_unlock();
+ raw_spin_unlock_irqrestore(&bg->lock, irq_flags);
+}
+
+void schedtune_exit_task(struct task_struct *tsk)
+{
+ struct schedtune *st;
+ unsigned long irq_flags;
+ unsigned int cpu;
+ struct rq *rq;
+ int idx;
+
+ if (!unlikely(schedtune_initialized))
+ return;
+
+ rq = lock_rq_of(tsk, &irq_flags);
+ rcu_read_lock();
+
+ cpu = cpu_of(rq);
+ st = task_schedtune(tsk);
+ idx = st->idx;
+ schedtune_tasks_update(tsk, cpu, idx, DEQUEUE_TASK);
+
+ rcu_read_unlock();
+ unlock_rq_of(rq, tsk, &irq_flags);
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+ struct boost_groups *bg;
+
+ bg = &per_cpu(cpu_boost_groups, cpu);
+ return bg->boost_max;
+}
+
+int schedtune_task_boost(struct task_struct *p)
+{
+ struct schedtune *st;
+ int task_boost;
+
+ /* Get task boost value */
+ rcu_read_lock();
+ st = task_schedtune(p);
+ task_boost = st->boost;
+ rcu_read_unlock();
+
+ return task_boost;
+}
+
+int schedtune_prefer_idle(struct task_struct *p)
+{
+ struct schedtune *st;
+ int prefer_idle;
+
+ /* Get prefer_idle value */
+ rcu_read_lock();
+ st = task_schedtune(p);
+ prefer_idle = st->prefer_idle;
+ rcu_read_unlock();
+
+ return prefer_idle;
+}
+
static u64
+prefer_idle_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+ struct schedtune *st = css_st(css);
+
+ return st->prefer_idle;
+}
+
+static int
+prefer_idle_write(struct cgroup_subsys_state *css, struct cftype *cft,
+ u64 prefer_idle)
+{
+ struct schedtune *st = css_st(css);
+ st->prefer_idle = prefer_idle;
+
+ return 0;
+}
+
+static s64
boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
{
struct schedtune *st = css_st(css);
@@ -256,16 +732,37 @@ boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
static int
boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
- u64 boost)
+ s64 boost)
{
struct schedtune *st = css_st(css);
+ unsigned threshold_idx;
+ int boost_pct;
- if (boost < 0 || boost > 100)
+ if (boost < -100 || boost > 100)
return -EINVAL;
+ boost_pct = boost;
+
+ /*
+ * Update threshold params for Performance Boost (B)
+ * and Performance Constraint (C) regions.
+ * The current implementatio uses the same cuts for both
+ * B and C regions.
+ */
+ threshold_idx = clamp(boost_pct, 0, 99) / 10;
+ st->perf_boost_idx = threshold_idx;
+ st->perf_constrain_idx = threshold_idx;
st->boost = boost;
- if (css == &root_schedtune.css)
+ if (css == &root_schedtune.css) {
sysctl_sched_cfs_boost = boost;
+ perf_boost_idx = threshold_idx;
+ perf_constrain_idx = threshold_idx;
+ }
+
+ /* Update CPU boost */
+ schedtune_boostgroup_update(st->idx, st->boost);
+
+ trace_sched_tune_config(st->boost);
return 0;
}
@@ -289,8 +786,13 @@ static void schedtune_attach(struct cgroup_taskset *tset)
static struct cftype files[] = {
{
.name = "boost",
- .read_u64 = boost_read,
- .write_u64 = boost_write,
+ .read_s64 = boost_read,
+ .write_s64 = boost_write,
+ },
+ {
+ .name = "prefer_idle",
+ .read_u64 = prefer_idle_read,
+ .write_u64 = prefer_idle_write,
},
#ifdef CONFIG_SCHED_HMP
{
@@ -315,26 +817,19 @@ static struct cftype files[] = {
static int
schedtune_boostgroup_init(struct schedtune *st)
{
- /* Keep track of allocated boost groups */
- allocated_group[st->idx] = st;
-
- return 0;
-}
-
-static int
-schedtune_init(void)
-{
struct boost_groups *bg;
int cpu;
+ /* Keep track of allocated boost groups */
+ allocated_group[st->idx] = st;
+
/* Initialize the per CPU boost groups */
for_each_possible_cpu(cpu) {
bg = &per_cpu(cpu_boost_groups, cpu);
- memset(bg, 0, sizeof(struct boost_groups));
+ bg->group[st->idx].boost = 0;
+ bg->group[st->idx].tasks = 0;
}
- pr_info(" schedtune configured to support %d boost groups\n",
- BOOSTGROUPS_COUNT);
return 0;
}
@@ -344,10 +839,8 @@ schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
struct schedtune *st;
int idx;
- if (!parent_css) {
- schedtune_init();
+ if (!parent_css)
return &root_schedtune.css;
- }
/* Allow only single level hierachies */
if (parent_css != &root_schedtune.css) {
@@ -386,6 +879,9 @@ out:
static void
schedtune_boostgroup_release(struct schedtune *st)
{
+ /* Reset this boost group */
+ schedtune_boostgroup_update(st->idx, 0);
+
/* Keep track of allocated boost groups */
allocated_group[st->idx] = NULL;
}
@@ -402,12 +898,55 @@ schedtune_css_free(struct cgroup_subsys_state *css)
struct cgroup_subsys schedtune_cgrp_subsys = {
.css_alloc = schedtune_css_alloc,
.css_free = schedtune_css_free,
+ .allow_attach = schedtune_allow_attach,
+ .can_attach = schedtune_can_attach,
+ .cancel_attach = schedtune_cancel_attach,
.legacy_cftypes = files,
.early_init = 1,
.allow_attach = subsys_cgroup_allow_attach,
.attach = schedtune_attach,
};
+static inline void
+schedtune_init_cgroups(void)
+{
+ struct boost_groups *bg;
+ int cpu;
+
+ /* Initialize the per CPU boost groups */
+ for_each_possible_cpu(cpu) {
+ bg = &per_cpu(cpu_boost_groups, cpu);
+ memset(bg, 0, sizeof(struct boost_groups));
+ }
+
+ pr_info("schedtune: configured to support %d boost groups\n",
+ BOOSTGROUPS_COUNT);
+
+ schedtune_initialized = true;
+}
+
+#else /* CONFIG_CGROUP_SCHEDTUNE */
+
+int
+schedtune_accept_deltas(int nrg_delta, int cap_delta,
+ struct task_struct *task)
+{
+ /* Optimal (O) region */
+ if (nrg_delta < 0 && cap_delta > 0) {
+ trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, 1, 0);
+ return INT_MAX;
+ }
+
+ /* Suboptimal (S) region */
+ if (nrg_delta > 0 && cap_delta < 0) {
+ trace_sched_tune_filter(nrg_delta, cap_delta, 0, 0, -1, 5);
+ return -INT_MAX;
+ }
+
+ return __schedtune_accept_deltas(nrg_delta, cap_delta,
+ perf_boost_idx, perf_constrain_idx);
+}
+
#endif /* CONFIG_CGROUP_SCHEDTUNE */
int
@@ -416,10 +955,183 @@ sysctl_sched_cfs_boost_handler(struct ctl_table *table, int write,
loff_t *ppos)
{
int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ unsigned threshold_idx;
+ int boost_pct;
if (ret || !write)
return ret;
+ if (sysctl_sched_cfs_boost < -100 || sysctl_sched_cfs_boost > 100)
+ return -EINVAL;
+ boost_pct = sysctl_sched_cfs_boost;
+
+ /*
+ * Update threshold params for Performance Boost (B)
+ * and Performance Constraint (C) regions.
+ * The current implementatio uses the same cuts for both
+ * B and C regions.
+ */
+ threshold_idx = clamp(boost_pct, 0, 99) / 10;
+ perf_boost_idx = threshold_idx;
+ perf_constrain_idx = threshold_idx;
+
return 0;
}
+#ifdef CONFIG_SCHED_DEBUG
+static void
+schedtune_test_nrg(unsigned long delta_pwr)
+{
+ unsigned long test_delta_pwr;
+ unsigned long test_norm_pwr;
+ int idx;
+
+ /*
+ * Check normalization constants using some constant system
+ * energy values
+ */
+ pr_info("schedtune: verify normalization constants...\n");
+ for (idx = 0; idx < 6; ++idx) {
+ test_delta_pwr = delta_pwr >> idx;
+
+ /* Normalize on max energy for target platform */
+ test_norm_pwr = reciprocal_divide(
+ test_delta_pwr << SCHED_LOAD_SHIFT,
+ schedtune_target_nrg.rdiv);
+
+ pr_info("schedtune: max_pwr/2^%d: %4lu => norm_pwr: %5lu\n",
+ idx, test_delta_pwr, test_norm_pwr);
+ }
+}
+#else
+#define schedtune_test_nrg(delta_pwr)
+#endif
+
+/*
+ * Compute the min/max power consumption of a cluster and all its CPUs
+ */
+static void
+schedtune_add_cluster_nrg(
+ struct sched_domain *sd,
+ struct sched_group *sg,
+ struct target_nrg *ste)
+{
+ struct sched_domain *sd2;
+ struct sched_group *sg2;
+
+ struct cpumask *cluster_cpus;
+ char str[32];
+
+ unsigned long min_pwr;
+ unsigned long max_pwr;
+ int cpu;
+
+ /* Get Cluster energy using EM data for the first CPU */
+ cluster_cpus = sched_group_cpus(sg);
+ snprintf(str, 32, "CLUSTER[%*pbl]",
+ cpumask_pr_args(cluster_cpus));
+
+ min_pwr = sg->sge->idle_states[sg->sge->nr_idle_states - 1].power;
+ max_pwr = sg->sge->cap_states[sg->sge->nr_cap_states - 1].power;
+ pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+ str, min_pwr, max_pwr);
+
+ /*
+ * Keep track of this cluster's energy in the computation of the
+ * overall system energy
+ */
+ ste->min_power += min_pwr;
+ ste->max_power += max_pwr;
+
+ /* Get CPU energy using EM data for each CPU in the group */
+ for_each_cpu(cpu, cluster_cpus) {
+ /* Get a SD view for the specific CPU */
+ for_each_domain(cpu, sd2) {
+ /* Get the CPU group */
+ sg2 = sd2->groups;
+ min_pwr = sg2->sge->idle_states[sg2->sge->nr_idle_states - 1].power;
+ max_pwr = sg2->sge->cap_states[sg2->sge->nr_cap_states - 1].power;
+
+ ste->min_power += min_pwr;
+ ste->max_power += max_pwr;
+
+ snprintf(str, 32, "CPU[%d]", cpu);
+ pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+ str, min_pwr, max_pwr);
+
+ /*
+ * Assume we have EM data only at the CPU and
+ * the upper CLUSTER level
+ */
+ BUG_ON(!cpumask_equal(
+ sched_group_cpus(sg),
+ sched_group_cpus(sd2->parent->groups)
+ ));
+ break;
+ }
+ }
+}
+
+/*
+ * Initialize the constants required to compute normalized energy.
+ * The values of these constants depends on the EM data for the specific
+ * target system and topology.
+ * Thus, this function is expected to be called by the code
+ * that bind the EM to the topology information.
+ */
+static int
+schedtune_init(void)
+{
+ struct target_nrg *ste = &schedtune_target_nrg;
+ unsigned long delta_pwr = 0;
+ struct sched_domain *sd;
+ struct sched_group *sg;
+
+ pr_info("schedtune: init normalization constants...\n");
+ ste->max_power = 0;
+ ste->min_power = 0;
+
+ rcu_read_lock();
+
+ /*
+ * When EAS is in use, we always have a pointer to the highest SD
+ * which provides EM data.
+ */
+ sd = rcu_dereference(per_cpu(sd_ea, cpumask_first(cpu_online_mask)));
+ if (!sd) {
+ if (energy_aware())
+ pr_warn("schedtune: no energy model data\n");
+ goto nodata;
+ }
+
+ sg = sd->groups;
+ do {
+ schedtune_add_cluster_nrg(sd, sg, ste);
+ } while (sg = sg->next, sg != sd->groups);
+
+ rcu_read_unlock();
+
+ pr_info("schedtune: %-17s min_pwr: %5lu max_pwr: %5lu\n",
+ "SYSTEM", ste->min_power, ste->max_power);
+
+ /* Compute normalization constants */
+ delta_pwr = ste->max_power - ste->min_power;
+ ste->rdiv = reciprocal_value(delta_pwr);
+ pr_info("schedtune: using normalization constants mul: %u sh1: %u sh2: %u\n",
+ ste->rdiv.m, ste->rdiv.sh1, ste->rdiv.sh2);
+
+ schedtune_test_nrg(delta_pwr);
+
+#ifdef CONFIG_CGROUP_SCHEDTUNE
+ schedtune_init_cgroups();
+#else
+ pr_info("schedtune: configured to support global boosting only\n");
+#endif
+
+ return 0;
+
+nodata:
+ rcu_read_unlock();
+ return -EINVAL;
+}
+postcore_initcall(schedtune_init);