213 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			213 lines
		
	
	
		
			5.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
 | |
|  *
 | |
|  * (C) Copyright 2016 Intel Corporation
 | |
|  * Author: Tim Chen <tim.c.chen@linux.intel.com>
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License
 | |
|  * as published by the Free Software Foundation; version 2
 | |
|  * of the License.
 | |
|  *
 | |
|  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
 | |
|  * the maximum turbo frequencies of some cores in a CPU package may be
 | |
|  * higher than for the other cores in the same package.  In that case,
 | |
|  * better performance can be achieved by making the scheduler prefer
 | |
|  * to run tasks on the CPUs with higher max turbo frequencies.
 | |
|  *
 | |
|  * This file provides functions and data structures for enabling the
 | |
|  * scheduler to favor scheduling on cores can be boosted to a higher
 | |
|  * frequency under ITMT.
 | |
|  */
 | |
| 
 | |
| #include <linux/sched.h>
 | |
| #include <linux/cpumask.h>
 | |
| #include <linux/cpuset.h>
 | |
| #include <linux/mutex.h>
 | |
| #include <linux/sysctl.h>
 | |
| #include <linux/nodemask.h>
 | |
| 
 | |
| static DEFINE_MUTEX(itmt_update_mutex);
 | |
| DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
 | |
| 
 | |
| /* Boolean to track if system has ITMT capabilities */
 | |
| static bool __read_mostly sched_itmt_capable;
 | |
| 
 | |
| /*
 | |
|  * Boolean to control whether we want to move processes to cpu capable
 | |
|  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
 | |
|  * Technology 3.0.
 | |
|  *
 | |
|  * It can be set via /proc/sys/kernel/sched_itmt_enabled
 | |
|  */
 | |
| unsigned int __read_mostly sysctl_sched_itmt_enabled;
 | |
| 
 | |
| static int sched_itmt_update_handler(struct ctl_table *table, int write,
 | |
| 				     void __user *buffer, size_t *lenp,
 | |
| 				     loff_t *ppos)
 | |
| {
 | |
| 	unsigned int old_sysctl;
 | |
| 	int ret;
 | |
| 
 | |
| 	mutex_lock(&itmt_update_mutex);
 | |
| 
 | |
| 	if (!sched_itmt_capable) {
 | |
| 		mutex_unlock(&itmt_update_mutex);
 | |
| 		return -EINVAL;
 | |
| 	}
 | |
| 
 | |
| 	old_sysctl = sysctl_sched_itmt_enabled;
 | |
| 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 | |
| 
 | |
| 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
 | |
| 		x86_topology_update = true;
 | |
| 		rebuild_sched_domains();
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&itmt_update_mutex);
 | |
| 
 | |
| 	return ret;
 | |
| }
 | |
| 
 | |
| static unsigned int zero;
 | |
| static unsigned int one = 1;
 | |
| static struct ctl_table itmt_kern_table[] = {
 | |
| 	{
 | |
| 		.procname	= "sched_itmt_enabled",
 | |
| 		.data		= &sysctl_sched_itmt_enabled,
 | |
| 		.maxlen		= sizeof(unsigned int),
 | |
| 		.mode		= 0644,
 | |
| 		.proc_handler	= sched_itmt_update_handler,
 | |
| 		.extra1		= &zero,
 | |
| 		.extra2		= &one,
 | |
| 	},
 | |
| 	{}
 | |
| };
 | |
| 
 | |
| static struct ctl_table itmt_root_table[] = {
 | |
| 	{
 | |
| 		.procname	= "kernel",
 | |
| 		.mode		= 0555,
 | |
| 		.child		= itmt_kern_table,
 | |
| 	},
 | |
| 	{}
 | |
| };
 | |
| 
 | |
| static struct ctl_table_header *itmt_sysctl_header;
 | |
| 
 | |
| /**
 | |
|  * sched_set_itmt_support() - Indicate platform supports ITMT
 | |
|  *
 | |
|  * This function is used by the OS to indicate to scheduler that the platform
 | |
|  * is capable of supporting the ITMT feature.
 | |
|  *
 | |
|  * The current scheme has the pstate driver detects if the system
 | |
|  * is ITMT capable and call sched_set_itmt_support.
 | |
|  *
 | |
|  * This must be done only after sched_set_itmt_core_prio
 | |
|  * has been called to set the cpus' priorities.
 | |
|  * It must not be called with cpu hot plug lock
 | |
|  * held as we need to acquire the lock to rebuild sched domains
 | |
|  * later.
 | |
|  *
 | |
|  * Return: 0 on success
 | |
|  */
 | |
| int sched_set_itmt_support(void)
 | |
| {
 | |
| 	mutex_lock(&itmt_update_mutex);
 | |
| 
 | |
| 	if (sched_itmt_capable) {
 | |
| 		mutex_unlock(&itmt_update_mutex);
 | |
| 		return 0;
 | |
| 	}
 | |
| 
 | |
| 	itmt_sysctl_header = register_sysctl_table(itmt_root_table);
 | |
| 	if (!itmt_sysctl_header) {
 | |
| 		mutex_unlock(&itmt_update_mutex);
 | |
| 		return -ENOMEM;
 | |
| 	}
 | |
| 
 | |
| 	sched_itmt_capable = true;
 | |
| 
 | |
| 	sysctl_sched_itmt_enabled = 1;
 | |
| 
 | |
| 	x86_topology_update = true;
 | |
| 	rebuild_sched_domains();
 | |
| 
 | |
| 	mutex_unlock(&itmt_update_mutex);
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * sched_clear_itmt_support() - Revoke platform's support of ITMT
 | |
|  *
 | |
|  * This function is used by the OS to indicate that it has
 | |
|  * revoked the platform's support of ITMT feature.
 | |
|  *
 | |
|  * It must not be called with cpu hot plug lock
 | |
|  * held as we need to acquire the lock to rebuild sched domains
 | |
|  * later.
 | |
|  */
 | |
| void sched_clear_itmt_support(void)
 | |
| {
 | |
| 	mutex_lock(&itmt_update_mutex);
 | |
| 
 | |
| 	if (!sched_itmt_capable) {
 | |
| 		mutex_unlock(&itmt_update_mutex);
 | |
| 		return;
 | |
| 	}
 | |
| 	sched_itmt_capable = false;
 | |
| 
 | |
| 	if (itmt_sysctl_header) {
 | |
| 		unregister_sysctl_table(itmt_sysctl_header);
 | |
| 		itmt_sysctl_header = NULL;
 | |
| 	}
 | |
| 
 | |
| 	if (sysctl_sched_itmt_enabled) {
 | |
| 		/* disable sched_itmt if we are no longer ITMT capable */
 | |
| 		sysctl_sched_itmt_enabled = 0;
 | |
| 		x86_topology_update = true;
 | |
| 		rebuild_sched_domains();
 | |
| 	}
 | |
| 
 | |
| 	mutex_unlock(&itmt_update_mutex);
 | |
| }
 | |
| 
 | |
| int arch_asym_cpu_priority(int cpu)
 | |
| {
 | |
| 	return per_cpu(sched_core_priority, cpu);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
 | |
|  * @prio:	Priority of cpu core
 | |
|  * @core_cpu:	The cpu number associated with the core
 | |
|  *
 | |
|  * The pstate driver will find out the max boost frequency
 | |
|  * and call this function to set a priority proportional
 | |
|  * to the max boost frequency. CPU with higher boost
 | |
|  * frequency will receive higher priority.
 | |
|  *
 | |
|  * No need to rebuild sched domain after updating
 | |
|  * the CPU priorities. The sched domains have no
 | |
|  * dependency on CPU priorities.
 | |
|  */
 | |
| void sched_set_itmt_core_prio(int prio, int core_cpu)
 | |
| {
 | |
| 	int cpu, i = 1;
 | |
| 
 | |
| 	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
 | |
| 		int smt_prio;
 | |
| 
 | |
| 		/*
 | |
| 		 * Ensure that the siblings are moved to the end
 | |
| 		 * of the priority chain and only used when
 | |
| 		 * all other high priority cpus are out of capacity.
 | |
| 		 */
 | |
| 		smt_prio = prio * smp_num_siblings / i;
 | |
| 		per_cpu(sched_core_priority, cpu) = smt_prio;
 | |
| 		i++;
 | |
| 	}
 | |
| }
 | 
