当前位置 博文首页 > Linux中计算特定CPU使用率案例详解

    Linux中计算特定CPU使用率案例详解

    作者:ibless 时间:2021-09-02 18:02

    Linux中计算特定CPU使用率 需求解决方案拓展参考

    需求

    在Linux中可以通过top指令查看某一进程占用的CPU情况,也可以查看某一个CPU使用率情况(先top指令,然后按数字“1”键即可显示每一个CPU的使用情况),如下图:

    查看CPU使用情况

    而我们的需求是:如何得到一个CPU的占用率呢?

    解决方案

    1. 背景知识

    在/proc/stat中可以查看每一个CPU的使用情况的,如下图:

    查看/proc/stat

    其中cpu(0/1/2/…)后面的那十个数字含义如下:

    /proc/stat
    kernel/system statistics.  Varies with architecture.  
    Common entries include:
    
         user nice system idle iowait  irq  softirq steal guest guest_nice
    cpu  4705 356  584    3699   23    23     0       0     0        0
    cpu0 1393280 32966 572056 13343292 6130 0 17875 0 23933 0
       The amount of time, measured in units of USER_HZ
       (1/100ths of a second on most architectures, use
       sysconf(_SC_CLK_TCK) to obtain the right value), that
       the system ("cpu" line) or the specific CPU ("cpuN"
       line) spent in various states:
    
       user   (1) Time spent in user mode.
    
       nice   (2) Time spent in user mode with low priority
              (nice).
    
       system (3) Time spent in system mode.
    
       idle   (4) Time spent in the idle task.  This value
              should be USER_HZ times the second entry in the
              /proc/uptime pseudo-file.
    
       iowait (since Linux 2.5.41)
              (5) Time waiting for I/O to complete.  This
              value is not reliable, for the following rea‐
              sons:
    
              1. The CPU will not wait for I/O to complete;
                 iowait is the time that a task is waiting for
                 I/O to complete.  When a CPU goes into idle
                 state for outstanding task I/O, another task
                 will be scheduled on this CPU.
    
              2. On a multi-core CPU, the task waiting for I/O
                 to complete is not running on any CPU, so the
                 iowait of each CPU is difficult to calculate.
    
              3. The value in this field may decrease in cer‐
                 tain conditions.
    
       irq (since Linux 2.6.0-test4)
              (6) Time servicing interrupts.
    
       softirq (since Linux 2.6.0-test4)
              (7) Time servicing softirqs.
    
       steal (since Linux 2.6.11)
              (8) Stolen time, which is the time spent in
              other operating systems when running in a virtu‐
              alized environment
    
       guest (since Linux 2.6.24)
              (9) Time spent running a virtual CPU for guest
              operating systems under the control of the Linux
              kernel.
    
       guest_nice (since Linux 2.6.33)
              (10) Time spent running a niced guest (virtual
              CPU for guest operating systems under the con‐
              trol of the Linux kernel).
    

    2.计算具体CPU使用率

    有了上面的背景知识,接下来我们就可以计算具体CPU的使用情况了。具体计算方式如下:

    Total CPU time since boot = user+nice+system+idle+iowait+irq+softirq+steal
    Total CPU Idle time since boot = idle + iowait
    Total CPU usage time since boot = Total CPU time since boot - Total CPU Idle time since boot
    Total CPU percentage = Total CPU usage time since boot/Total CPU time since boot * 100%
    

    有了上面的计算公式,计算某一CPU使用率或者系统总的CPU占用率也就是不难了。
    示例:计算系统整体CPU占用情况
    首先从/proc/stat中获取 t1时刻系统总体的user、nice、system、idle、iowait、irq、softirq、steal、guest、guest_nice的值,得到此时Total CPU time since boot(记为total1)和 Total CPU idle time since boot(记为idle1)。
    其次,从/proc/stat中获取t2时刻系统总的Total CPU time since boot(记为total2)和Total CPU idle time since boot(记为idle2)。(方法同上一步)
    最后,计算t2t1之间系统总的CPU使用情况。也就是:
    CPU percentage between t1 and t2 = ((total2-total1)-(idle2-idle1))/(total2-total1)* 100%
    其中, ((total2-total1)-(idle2-idle1))实际上就是t1与t2时刻之间系统CPU被占用的时间(总时间 - 空闲时间)。
    下面是一段计算时间段内CPU被占用情况的脚本:

    #!/bin/bash
    # by Paul Colby (http://colby.id.au), no rights reserved ;)
    
    PREV_TOTAL=0
    PREV_IDLE=0
    
    while true; do
      # Get the total CPU statistics, discarding the 'cpu ' prefix.
      CPU=(`sed -n 's/^cpu\s//p' /proc/stat`)
      IDLE=${CPU[3]} # Just the idle CPU time.
    
      # Calculate the total CPU time.
      TOTAL=0
      for VALUE in "${CPU[@]}"; do
        let "TOTAL=$TOTAL+$VALUE"
      done
    
      # Calculate the CPU usage since we last checked.
      let "DIFF_IDLE=$IDLE-$PREV_IDLE"
      let "DIFF_TOTAL=$TOTAL-$PREV_TOTAL"
      let "DIFF_USAGE=(1000*($DIFF_TOTAL-$DIFF_IDLE)/$DIFF_TOTAL+5)/10"
      echo -en "\rCPU: $DIFF_USAGE%  \b\b"
    
      # Remember the total and idle CPU times for the next check.
      PREV_TOTAL="$TOTAL"
      PREV_IDLE="$IDLE"
    
      # Wait before checking again.
      sleep 1
    done
    

    拓展

    在内核中,关于/proc/stat中文件的实现函数如下:

    附注:内核版本3.14.69,文件为 /fs/proc/stat.c
    
    #include <linux/cpumask.h>
    #include <linux/fs.h>
    #include <linux/init.h>
    #include <linux/interrupt.h>
    #include <linux/kernel_stat.h>
    #include <linux/proc_fs.h>
    #include <linux/sched.h>
    #include <linux/seq_file.h>
    #include <linux/slab.h>
    #include <linux/time.h>
    #include <linux/irqnr.h>
    #include <asm/cputime.h>
    #include <linux/tick.h>
    
    #ifndef arch_irq_stat_cpu
    #define arch_irq_stat_cpu(cpu) 0
    #endif
    #ifndef arch_irq_stat
    #define arch_irq_stat() 0
    #endif
    
    #ifdef arch_idle_time
    
    static cputime64_t get_idle_time(int cpu)
    {
    	cputime64_t idle;
    
    	idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
    	if (cpu_online(cpu) && !nr_iowait_cpu(cpu))
    		idle += arch_idle_time(cpu);
    	return idle;
    }
    
    static cputime64_t get_iowait_time(int cpu)
    {
    	cputime64_t iowait;
    
    	iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
    	if (cpu_online(cpu) && nr_iowait_cpu(cpu))
    		iowait += arch_idle_time(cpu);
    	return iowait;
    }
    
    #else
    
    static u64 get_idle_time(int cpu)
    {
    	u64 idle, idle_time = -1ULL;
    
    	if (cpu_online(cpu))
    		idle_time = get_cpu_idle_time_us(cpu, NULL);
    
    	if (idle_time == -1ULL)
    		/* !NO_HZ or cpu offline so we can rely on cpustat.idle */
    		idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE];
    	else
    		idle = usecs_to_cputime64(idle_time);
    
    	return idle;
    }
    
    static u64 get_iowait_time(int cpu)
    {
    	u64 iowait, iowait_time = -1ULL;
    
    	if (cpu_online(cpu))
    		iowait_time = get_cpu_iowait_time_us(cpu, NULL);
    
    	if (iowait_time == -1ULL)
    		/* !NO_HZ or cpu offline so we can rely on cpustat.iowait */
    		iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT];
    	else
    		iowait = usecs_to_cputime64(iowait_time);
    
    	return iowait;
    }
    
    #endif
    
    static int show_stat(struct seq_file *p, void *v)
    {
    	int i, j;
    	unsigned long jif;
    	u64 user, nice, system, idle, iowait, irq, softirq, steal;
    	u64 guest, guest_nice;
    	u64 sum = 0;
    	u64 sum_softirq = 0;
    	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
    	struct timespec boottime;
    
    	user = nice = system = idle = iowait =
    		irq = softirq = steal = 0;
    	guest = guest_nice = 0;
    	getboottime(&boottime);
    	jif = boottime.tv_sec;
    
    	for_each_possible_cpu(i) {
    		user += kcpustat_cpu(i).cpustat[CPUTIME_USER];
    		nice += kcpustat_cpu(i).cpustat[CPUTIME_NICE];
    		system += kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
    		idle += get_idle_time(i);
    		iowait += get_iowait_time(i);
    		irq += kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
    		softirq += kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
    		steal += kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
    		guest += kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
    		guest_nice += kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
    		sum += kstat_cpu_irqs_sum(i);
    		sum += arch_irq_stat_cpu(i);
    
    		for (j = 0; j < NR_SOFTIRQS; j++) {
    			unsigned int softirq_stat = kstat_softirqs_cpu(j, i);
    
    			per_softirq_sums[j] += softirq_stat;
    			sum_softirq += softirq_stat;
    		}
    	}
    	sum += arch_irq_stat();
    
    	seq_puts(p, "cpu ");
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
    	seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
    	seq_putc(p, '\n');
    
    	for_each_online_cpu(i) {
    		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
    		user = kcpustat_cpu(i).cpustat[CPUTIME_USER];
    		nice = kcpustat_cpu(i).cpustat[CPUTIME_NICE];
    		system = kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM];
    		idle = get_idle_time(i);
    		iowait = get_iowait_time(i);
    		irq = kcpustat_cpu(i).cpustat[CPUTIME_IRQ];
    		softirq = kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ];
    		steal = kcpustat_cpu(i).cpustat[CPUTIME_STEAL];
    		guest = kcpustat_cpu(i).cpustat[CPUTIME_GUEST];
    		guest_nice = kcpustat_cpu(i).cpustat[CPUTIME_GUEST_NICE];
    		seq_printf(p, "cpu%d", i);
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(user));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(nice));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(system));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(idle));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(iowait));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(irq));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(softirq));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(steal));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest));
    		seq_put_decimal_ull(p, ' ', cputime64_to_clock_t(guest_nice));
    		seq_putc(p, '\n');
    	}
    	seq_printf(p, "intr %llu", (unsigned long long)sum);
    
    	/* sum again ? it could be updated? */
    	for_each_irq_nr(j)
    		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
    
    	seq_printf(p,
    		"\nctxt %llu\n"
    		"btime %lu\n"
    		"processes %lu\n"
    		"procs_running %lu\n"
    		"procs_blocked %lu\n",
    		nr_context_switches(),
    		(unsigned long)jif,
    		total_forks,
    		nr_running(),
    		nr_iowait());
    
    	seq_printf(p, "softirq %llu", (unsigned long long)sum_softirq);
    
    	for (i = 0; i < NR_SOFTIRQS; i++)
    		seq_put_decimal_ull(p, ' ', per_softirq_sums[i]);
    	seq_putc(p, '\n');
    
    	return 0;
    }
    
    static int stat_open(struct inode *inode, struct file *file)
    {
    	size_t size = 1024 + 128 * num_possible_cpus();
    	char *buf;
    	struct seq_file *m;
    	int res;
    
    	/* minimum size to display an interrupt count : 2 bytes */
    	size += 2 * nr_irqs;
    
    	/* don't ask for more than the kmalloc() max size */
    	if (size > KMALLOC_MAX_SIZE)
    		size = KMALLOC_MAX_SIZE;
    	buf = kmalloc(size, GFP_KERNEL);
    	if (!buf)
    		return -ENOMEM;
    
    	res = single_open(file, show_stat, NULL);
    	if (!res) {
    		m = file->private_data;
    		m->buf = buf;
    		m->size = ksize(buf);
    	} else
    		kfree(buf);
    	return res;
    }
    
    static const struct file_operations proc_stat_operations = {
    	.open		= stat_open,
    	.read		= seq_read,
    	.llseek		= seq_lseek,
    	.release	= single_release,
    };
    
    static int __init proc_stat_init(void)
    {
    	proc_create("stat", 0, NULL, &proc_stat_operations);
    	return 0;
    }
    fs_initcall(proc_stat_init);
    

    参考

    http://man7.org/linux/man-pages/man5/proc.5.html

    https://github.com/pcolby/scripts/blob/master/cpu.sh

    https://elixir.bootlin.com/linux/v3.14.69/source/fs/proc/stat.c

    jsjswy