Rust 为什么基于 clap：：Parser 读取的大量数字进行计算比硬编码该数字时慢

发布于02月20日

我有一个非常简单的程序，它从命令行获取一个整数参数并执行以下任务:

use clap::Parser ;

#[derive(Parser,Default)]
struct Args {
    #[arg(short)]
    number: u128
}

fn sum(n: u128) -> u128 {
    let mut result: u128 = 0;
    for i in 1..n { result += n/i; }
    result 
}

fn main() {
    let args = Args::parse() ;
    println!("{}", sum(args.number)) ;
}

很简单，对吧？嗯，当对"大"数字执行它时，比如说999999999，在我的机器上几乎需要10秒才能得到结果.

$ time ./target/release/main -n 999999999
20877697533

real    0m9.442s
user    0m9.370s
sys     0m0.030s

但是，如果我禁止使用任何clap并硬编码值

fn sum(n: u128) -> u128 {
    let mut result: u128 = 0;
    for i in 1..n { result += n/i; }
    result 
}

fn main() {
    let n: u128 = 999999999 ;
    println!("{}", sum(n)) ;
}

执行时间降至两秒左右

$ time ./target/release/main
20877697533

real    0m2.398s
user    0m2.236s
sys     0m0.013s

那么，是什么让CLAP版本如此缓慢呢？

xorl %esi, %esi cmpq $2, %r15 movq %rdx, %rax sbbq $0, %rax movl $0, %r13d jb .LBB5_9 movq %r15, %rsi addq $-1, %rsi movq %rdx, %rdi adcq $-1, %rdi movq %r15, %rax addq $-2, %rax movq %rdx, %rcx adcq $-1, %rcx movq %rsi, 32(%rsp) andl $3, %esi movq %rsi, 40(%rsp) cmpq $3, %rax sbbq $0, %rcx movq %rdx, 8(%rsp) jae .LBB5_4 xorl %ebp, %ebp movl $1, %edx xorl %r13d, %r13d xorl %ecx, %ecx jmp .LBB5_6 .LBB5_4: andq $-4, 32(%rsp) movl $1, %edx xorl %ebp, %ebp xorl %r13d, %r13d xorl %ecx, %ecx xorl %esi, %esi xorl %ebx, %ebx movq %rdi, 64(%rsp) .LBB5_5: movq %rbx, 88(%rsp) movq %rsi, 96(%rsp) movq %rcx, 16(%rsp) movq %rdx, (%rsp) addq $1, %rdx movq %rdx, 24(%rsp) movq %rcx, %rbx adcq $0, %rbx movq %r15, %rdi movq 8(%rsp), %rsi movq (%rsp), %rdx movq 16(%rsp), %rcx movq __udivti3@GOTPCREL(%rip), %r14 callq *%r14 movq %r14, %r8 movq %rax, %r14 movq %rdx, %r12 addq %rbp, %r14 adcq %r13, %r12 movq (%rsp), %rax addq $2, %rax movq %rax, 80(%rsp) movq 16(%rsp), %rax adcq $0, %rax movq %rax, 72(%rsp) movq %r15, %rdi movq 8(%rsp), %rsi movq 24(%rsp), %rdx movq %rbx, %rcx movq %r8, %rbx callq *%r8 movq %rbx, %r8 movq %rax, %rbx movq %r15, %r13 movq %rdx, %rbp addq %r14, %rbx adcq %r12, %rbp movq (%rsp), %rax addq $3, %rax movq %rax, 24(%rsp) movq 16(%rsp), %r15 adcq $0, %r15 movq %r13, %rdi movq 8(%rsp), %rsi movq 80(%rsp), %rdx movq 72(%rsp), %rcx movq %r8, %r14 callq *%r8 movq %r14, %r8 movq %rax, %r12 movq %rdx, %r14 addq %rbx, %r12 adcq %rbp, %r14 addq $4, (%rsp) adcq $0, 16(%rsp) movq %r13, %rdi movq 8(%rsp), %rsi movq 24(%rsp), %rdx movq %r15, %rcx movq %r13, %r15 callq *%r8 movq 88(%rsp), %rbx movq 96(%rsp), %rsi movq 64(%rsp), %rdi movq %rax, %rbp movq %rdx, %r13 movq (%rsp), %rdx addq %r12, %rbp adcq %r14, %r13 addq $4, %rsi adcq $0, %rbx movq %rsi, %rax xorq 32(%rsp), %rax movq %rbx, %rcx xorq %rdi, %rcx orq %rax, %rcx movq 16(%rsp), %rcx jne .LBB5_5 .LBB5_6: cmpq $0, 40(%rsp) movq %rbp, %rsi je .LBB5_9 xorl %r12d, %r12d xorl %ebp, %ebp movq %rdx, %rbx movq %rcx, %r14 .LBB5_8: movq %rsi, (%rsp) addq $1, %rbx adcq $0, %r14 movq %r15, %rdi movq 8(%rsp), %rsi callq *__udivti3@GOTPCREL(%rip) movq (%rsp), %rsi addq %rax, %rsi adcq %rdx, %r13 addq $1, %r12 adcq $0, %rbp movq %r12, %rax xorq 40(%rsp), %rax orq %rbp, %rax movq %rbx, %rdx movq %r14, %rcx jne .LBB5_8

.LBB5_1: movl $999999999, %eax xorl %edx, %edx divl %r8d movl %eax, %r9d addq %rcx, %r9 adcq $0, %rdi addq $2, %r10 adcq $0, %r11 leal 1(%r8), %ecx movl $999999999, %eax xorl %edx, %edx divl %ecx movl %eax, %ecx addq %r9, %rcx adcq $0, %rdi cmpq $999999997, %r8 sbbq $0, %rsi movq %r10, %r8 movq %r11, %rsi jb .LBB5_1 subq $88, %rsp movq %rcx, 24(%rsp) movq %rdi, 32(%rsp) leaq 24(%rsp), %rax movq %rax, 8(%rsp) movq core::fmt::num::<impl core::fmt::Display for u128>::fmt@GOTPCREL(%rip), %rax movq %rax, 16(%rsp) leaq .L__unnamed_2(%rip), %rax movq %rax, 56(%rsp) movq $2, 64(%rsp) movq $0, 40(%rsp) leaq 8(%rsp), %rax movq %rax, 72(%rsp) movq $1, 80(%rsp) leaq 40(%rsp), %rdi callq *std::io::stdio::_print@GOTPCREL(%rip) addq $88, %rsp retq

Rust 为什么基于 clap：：Parser 读取的大量数字进行计算比硬编码该数字时慢

推荐答案

Rust相关问答推荐

Rust，polars CSV：有没有一种方法可以从impll BufRead(或任何字节迭代器)中读取CSV？

重新导出proc宏导致未解决的extern crate错误""

包含嵌套 struct 的CSV

交叉术语未正确清除屏幕

rust 蚀生命周期行为

下载压缩文件

防止cargo test 中的竞争条件

Tokio_Postgres行上未显示退回特性的生存期，且生命周期不够长

根据填充系数以相对大小在给定空间中布局项目

关于使用平面图功能的borrow 判断器的问题

tokio：：sync：：broadcast：：Receiver 不是克隆

如何在 Rust 中将 Vec> 转换为 Vec>？

仅在运行测试时生成调试输出

为什么1..=100返回一个范围而不是一个整数？

字符串切片的向量超出范围但原始字符串仍然存在，为什么判断器说有错误？

使用 `.` 将 T 转换为 &mut T？

使用 rust-sqlx/tokio 时如何取消长时间运行的查询

为什么 Rust 标准库同时为 Thing 和 &Thing 实现特征？

如何从 Rust 应用程序连接到 Docker 容器中的 SurrealDB？

这个 match 语句的默认值应该是什么，还有一种方法可以解开 Some case (chess in rust)