我正在编写Boyer Moore算法的一个更简单的版本,我需要使用循环缓冲区,因为可能会有非常大的输入.程序应写入已与模式进行比较的符号的所有位置.但当我将程序提交给GitLab服务器时,它没有通过测试.这些测试判断程序中的未定义行为,因此它们失败的原因非常明显.然而,我找不到UB.唯一的提示是一条错误消息,但我不知道如何使用这些字节地址来解决我的问题.另外,如果"pat"只是一个基本字符串,它怎么会下溢呢?

#include <stdio.h>

#define MAX_PATTERN_LEN 16
#define BUF_SIZE 16
#define ALPH_SIZE 256

void calculate_shifts(const unsigned char *str, int len_str, int *shifts) {
    for (int i = 0; i < ALPH_SIZE; i++) 
        shifts[i] = len_str;
    for (int i = 0; i < len_str - 1; i++) 
        shifts[str[i]] = len_str - 1 - i;
}

int read_str(int max_len, unsigned char *str_place, int *is_patread, int *ind_EOF) {
    int count_read = 0;
    int ch = EOF;
    for (int i = 0; i < max_len; i++) {
        ch = getchar();
        if (ch == EOF) {
            if (!(*ind_EOF)) 
                *ind_EOF = 1; 
            break;

        }
        if ((ch == '\n') && (*is_patread == 0)) {
            *is_patread = 1;
            break;
        }
        str_place[i] = (char) ch;
        count_read++;
    }
    return count_read;
}

typedef struct {
    unsigned char *buf;
    int idxIn;
    int idxOut;
    int size;
} Ring_buf;

void Ring_put(Ring_buf *ring, unsigned char symbol) {
    ring->buf[ring->idxIn++] = symbol;
    if (ring->idxIn >= ring->size) 
        ring->idxIn = 0;
}

void Ring_push_idxOut(Ring_buf *ring, int jump) {
    int new_indxOut = ring->idxOut + jump;
    if (new_indxOut >= ring->size) new_indxOut -= ring->size;
    ring->idxOut = new_indxOut;
}

void Ring_init(Ring_buf *ring, unsigned char *buf, int size) {
    ring->size = size;
    ring->buf = buf;
    ring->idxIn = 0;
    ring->idxOut = 0;
}

unsigned char Ring_showch(const Ring_buf *ring, int idx) {
    int actual_idx = ring->idxOut + idx;
    if (actual_idx >= ring->size) 
        actual_idx -= ring->size;
    return ring->buf[actual_idx];
}

int Ring_put_nchars(Ring_buf *ring, int num, int ind_EOF) {
    if (ind_EOF) return 0;
    int count_read = 0;
    int ch = EOF;
    for (int i = 0; i < num; i++) {
        ch = getchar();
        if (ch == EOF)
            break;
        Ring_put(ring, ch);
        count_read++;
    }
    return (count_read == num);
}

void search_substr(Ring_buf *ring, const unsigned char *pat, int pat_len, int ind_EOF) {
    int shift = 0, local_shift = 0, shift_addition = 0, shifts[ALPH_SIZE];
    calculate_shifts(pat, pat_len, shifts);
    for (;;) {
        while (local_shift <= (ring->size - pat_len)) {
            int j = pat_len - 1;
            for (; (Ring_showch(ring, local_shift + j) == pat[j]) && (j >= 0) ; j--) 
                printf("%d ", shift + local_shift + j + 1);
            if (j < 0) {
                shift_addition = shifts[pat[pat_len - 1]];
                local_shift += shift_addition;
            } else {
                printf("%d ", shift + local_shift + j + 1);
                shift_addition = shifts[Ring_showch(ring, local_shift + j)];
                if (j == 0) 
                    shift_addition = pat_len;
                if ((shift_addition == pat_len) && (j < pat_len - 1) && (pat[pat_len - 1] == pat[0])) 
                    shift_addition--;
                local_shift += shift_addition;
            }
        }
        int req_addplace = pat_len - 1 - (ring->size - 1 - local_shift);
        shift += req_addplace;
        if (!Ring_put_nchars(ring, req_addplace, ind_EOF))
            break;
        Ring_push_idxOut(ring, req_addplace);
        local_shift -= req_addplace;
    }
}

int main(void) {
    unsigned char pat[MAX_PATTERN_LEN + 1] = {'\0'};
    unsigned char str[BUF_SIZE] = {'\0'};
    int is_patread = 0, ind_EOF = 0;
    int pat_len = read_str(MAX_PATTERN_LEN + 1, pat, &is_patread, NULL);
    int str_len = read_str(BUF_SIZE, str, &is_patread, &ind_EOF);
    if (!str_len || !pat_len)
        return 0;
    Ring_buf ring;
    Ring_init(&ring, str, str_len);
    search_substr(&ring, pat, pat_len, ind_EOF);
    return 0;
}

input: example\nthis is simple example

output: 7, 14, 13, 12, 11, 10, 20, 22, 21, 20, 19, 18, 17, 16

the error message:
==284==ERROR: AddressSanitizer: stack-buffer-underflow on address 0x7ffd3cdb915f at pc 0x00000040166d bp 0x7ffd3cdb8b90 sp 0x7ffd3cdb8b80
READ of size 1 at 0x7ffd3cdb915f thread T0
    #0 0x40166c in search_substr /builds/J6i_AJma/0/c_programming_autumn/23203/i.bogachenkov/template/lab1-0/src/main.c:87
    #1 0x40187b in main /builds/J6i_AJma/0/c_programming_autumn/23203/i.bogachenkov/template/lab1-0/src/main.c:121
    #2 0x7f068531ab96 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x21b96)
    #3 0x400b69 in _start (/builds/J6i_AJma/0/c_programming_autumn/23203/i.bogachenkov/template/build_lab1-0/lab1-0+0x400b69)
Address 0x7ffd3cdb915f is located in stack of thread T0 at offset 31 in frame
    #0 0x4016af in main /builds/J6i_AJma/0/c_programming_autumn/23203/i.bogachenkov/template/lab1-0/src/main.c:111
  This frame has 5 object(s):
    [32, 49) 'pat:112' <== Memory access at offset 31 underflows this variable
    [96, 112) 'str:113'
    [128, 132) 'is_patread:114'
    [144, 148) 'ind_EOF:114'
    [160, 184) 'ring:119'
HINT: this may be a false positive if your program uses some custom stack unwind mechanism or swapcontext
      (longjmp and C++ exceptions *are* supported)
SUMMARY: AddressSanitizer: stack-buffer-underflow /builds/J6i_AJma/0/c_programming_autumn/23203/i.bogachenkov/template/lab1-0/src/main.c:87 in search_substr
Shadow bytes around the buggy address:
  0x1000279af1d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000279af1e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000279af1f0: 00 00 00 00 00 00 00 00 f3 f3 f3 f3 f3 f3 f3 f3
  0x1000279af200: f3 f3 f3 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00
  0x1000279af210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
=>0x1000279af220: 00 00 00 00 00 00 00 00 f1 f1 f1[f1]00 00 01 f2
  0x1000279af230: f2 f2 f2 f2 00 00 f2 f2 04 f2 04 f2 00 00 00 f3
  0x1000279af240: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000279af250: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000279af260: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x1000279af270: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==284==ABORTING

推荐答案

  1. main()/read_str():当ind_EOF被取消引用时,int pat_len = read_str(MAX_PATTERN_LEN, pat, &is_patread, NULL)是未定义的行为(段错误).要么将其视为错误,要么仅取消引用ind_EOF(如果不是NULL):

            if(ind_EOF)
                *ind_EOF = 1;
    
  2. read_str():更喜欢初始化out参数而不是依赖于caller来做(在循环之前):

    if(ind_EOF)
       *ind_EOF = 0;
    
  3. read_str():如果成功,OUT参数str_place应该是一个字符串,即'\0'终止.最小的修复方法是传入max_len个比数组大小小1的数组,因为您已经将输入字符串初始化为零:

    unsigned char pat[MAX_PATTERN_LEN + 1] = {0};
    unsigned char str[BUF_SIZE + 1] = {0};
    int is_patread = 0, ind_EOF = 0;
    int pat_len = read_str(MAX_PATTERN_LEN, pat, &is_patread, NULL);
    int str_len = read_str(BUF_SIZE, str, &is_patread, &ind_EOF);
    
  4. read_str():考虑使用fgets()并删除is_patreadind_EOF参数,因为只有第二个调用方使用ind_EOF.

    size_t read_str(size_t n, char *s) {
        return fgets(s, n, stdin) ? strlen(s) : 0;
    }
    

    ...如果你知道你会返回,就不要从用户那里读取更多的输入(!pat_len):

    int main(void) {
        char pat[MAX_PATTERN_LEN];
        size_t pat_len = read_str(MAX_PATTERN_LEN, pat);
        if(!pat_len) return 0;
    
        char str[BUF_SIZE];
        size_t str_len = read_str(BUF_SIZE, str);
        if(!str_len) return 0;
        int ind_EOF = feof(stdin);
        // ...
    
  5. search_substr():在使用它之前判断j是否在预期范围内(即交换控制表达式的两个表达式的顺序):

            for (; ((j >= 0) && Ring_showch(ring, local_shift + j) == pat[j]); j--)
    
  6. Ring_put_nchars():在示例中输入调用ch = getchar()中的块.我不知道这是否是预期的行为,但读入一个旨在编写的函数是很奇怪的.

  7. 你不用(unsigned char),这有点奇怪.

C++相关问答推荐

为什么可以通过指向常量int的指针间接地改变整数的值?

如果我释放其他内容,返回值就会出错

Clang:如何强制运行时错误的崩溃/异常由于-fsanitize=undefined

向上强制转换C中的数值类型总是可逆的吗?

双指针指向常量双指针的指针类型赋值不兼容

Char变量如何在不使用方括号或花括号的情况下存储字符串,以及它如何迭代到下一个字符?

Sizeof(&Q;字符串&Q;)的正确输出是什么?

FRIDA-服务器成为端口扫描的目标?

C指针概念分段故障

如何在VS 2022中正确安装额外的C头文件

预处理器宏扩展(ISO/IEC 9899:1999(E)§;6.10.3.5示例3)

安全倒计时循环

C程序向服务器发送TCPRST

在运行时判断C/C++指针是否指向只读内存(在Linux操作系统中)

用于计算位数和的递归C函数

如何在不更改格式说明符的情况下同时支持双精度和长双精度?

既然我们在 if 中将 int 的值更改为 10,为什么在第二个 fork 后,子进程及其创建的子进程都会打印 33 ?

保存有符号整数结果的变量是否会溢出(后增量的副作用),并且此后从未在任何表达式中使用过它,是否会导致 UB?

clion.我无法理解 Clion 中发生的 scanf 错误

Codewars Kata 掷骰子的不稳定行为