commit 7a54ad3ae5d25aa5349668c8d3d3fa048a8f0733 from: Stefan Sperling date: Sun Sep 20 23:04:57 2020 UTC use buffered stdio if falling back on file i/o; this is much faster commit - 03f497279db41e42ac287a15df9f9e03c4c83fd2 commit + 7a54ad3ae5d25aa5349668c8d3d3fa048a8f0733 blob - 283d70c25b80709512d47b567d0348516692f4f0 blob + ced7b7cda23b7557d6c4109730b7e7a41b0db237 --- diff/diff.c +++ diff/diff.c @@ -35,7 +35,7 @@ __dead void usage(void); int diffreg(char *, char *, bool, bool, int); -int openfile(const char *, char **, struct stat *); +FILE * openfile(const char *, char **, struct stat *); __dead void usage(void) @@ -136,7 +136,7 @@ diffreg(char *file1, char *file2, bool do_patience, bo int context_lines) { char *str1, *str2; - int fd1, fd2; + FILE *f1, *f2; struct stat st1, st2; struct diff_input_info info = { .left_path = file1, @@ -148,10 +148,10 @@ diffreg(char *file1, char *file2, bool do_patience, bo cfg = do_patience ? &diff_config_patience : &diff_config; - fd1 = openfile(file1, &str1, &st1); - fd2 = openfile(file2, &str2, &st2); + f1 = openfile(file1, &str1, &st1); + f2 = openfile(file2, &str2, &st2); - result = diff_main(cfg, fd1, str1, st1.st_size, fd2, str2, st2.st_size, + result = diff_main(cfg, f1, str1, st1.st_size, f2, str2, st2.st_size, ignore_whitespace); #if 0 rc = diff_output_plain(stdout, &info, result); @@ -164,29 +164,29 @@ diffreg(char *file1, char *file2, bool do_patience, bo munmap(str1, st1.st_size); if (str2) munmap(str2, st2.st_size); - close(fd1); - close(fd2); + fclose(f1); + fclose(f2); return rc; } -int +FILE * openfile(const char *path, char **p, struct stat *st) { - int fd; + FILE *f = NULL; - fd = open(path, O_RDONLY); - if (fd == -1) + f = fopen(path, "r"); + if (f == NULL) err(2, "%s", path); - if (fstat(fd, st) == -1) + if (fstat(fileno(f), st) == -1) err(2, "%s", path); #ifndef DIFF_NO_MMAP - *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fd, 0); + *p = mmap(NULL, st->st_size, PROT_READ, MAP_PRIVATE, fileno(f), 0); if (*p == MAP_FAILED) #endif *p = NULL; /* fall back on file I/O */ - return fd; + return f; } blob - 5d87a63c1fcbe06352062094f2bd48b70b86f1d4 blob + 6c36ecdb31c5271095adb923f9de7efe30eb74a9 --- include/diff/diff_main.h +++ include/diff/diff_main.h @@ -94,7 +94,7 @@ struct diff_atom { * "child" structs, atoms_allocated == 0, to indicate that the struct is * referencing a subset of atoms. */ struct diff_data { - int fd; /* if root diff_data and not memory-mapped */ + FILE *f; /* if root diff_data and not memory-mapped */ off_t pos; /* if not memory-mapped */ const uint8_t *data; /* if memory-mapped */ off_t len; @@ -355,8 +355,8 @@ struct diff_config { }; struct diff_result *diff_main(const struct diff_config *config, - int left_fd, const uint8_t *left_data, + FILE *left_f, const uint8_t *left_data, off_t left_len, - int right_fd, const uint8_t *right_data, + FILE *right_f, const uint8_t *right_data, off_t right_len, bool ignore_whitespace); void diff_result_free(struct diff_result *result); blob - 343ee6ed664dd116c5552294d00eade60b6d28c3 blob + e0bebd8db4f63c98c197cd5b1b64ddd23aeee754 --- lib/diff_atomize_text.c +++ lib/diff_atomize_text.c @@ -29,27 +29,30 @@ static int diff_data_atomize_text_lines_fd(struct diff_data *d) { - off_t pos = lseek(d->root->fd, 0, SEEK_SET); + off_t pos = 0; const off_t end = pos + d->len; - unsigned int array_size_estimate = d->len / 50; unsigned int pow2 = 1; + while (array_size_estimate >>= 1) pow2++; ARRAYLIST_INIT(d->atoms, 1 << pow2); + if (fseek(d->root->f, 0L, SEEK_SET) == -1) + return errno; + while (pos < end) { off_t line_end = pos; unsigned int hash = 0; unsigned char buf[512]; - ssize_t r, i; + size_t r, i; struct diff_atom *atom; int eol = 0; while (eol == 0 && line_end < end) { - r = read(d->root->fd, buf, sizeof(buf)); - if (r == -1) + r = fread(buf, sizeof(char), sizeof(buf), d->root->f); + if (r == 0 && ferror(d->root->f)) return errno; i = 0; while (eol == 0 && i < r) { @@ -68,10 +71,10 @@ diff_data_atomize_text_lines_fd(struct diff_data *d) line_end++; /* If that was an '\r', also pull in any following '\n' */ if (line_end < end && eol == '\r') { - if (lseek(d->root->fd, line_end, SEEK_SET) == -1) + if (fseeko(d->root->f, line_end, SEEK_SET) == -1) return errno; - r = read(d->root->fd, buf, 1); - if (r == -1) + r = fread(buf, sizeof(char), sizeof(buf), d->root->f); + if (r == 0 && ferror(d->root->f)) return errno; if (r == 1 && buf[0] == '\n' ) line_end++; @@ -92,7 +95,7 @@ diff_data_atomize_text_lines_fd(struct diff_data *d) /* Starting point for next line: */ pos = line_end; - if (lseek(d->root->fd, pos, SEEK_SET) == -1) + if (fseeko(d->root->f, pos, SEEK_SET) == -1) return errno; } blob - c27b77f944d5bf9f213dc5606d98ded6c92f05e2 blob + d2e422c8e42c1d2acba415753106bfc8ee1086a9 --- lib/diff_main.c +++ lib/diff_main.c @@ -35,13 +35,13 @@ #include "diff_debug.h" static int -read_at(int fd, int at_pos, unsigned char *buf, size_t len) +read_at(FILE *f, off_t at_pos, unsigned char *buf, size_t len) { int r; - if (lseek(fd, at_pos, SEEK_SET) == -1) + if (fseeko(f, at_pos, SEEK_SET) == -1) return errno; - r = read(fd, buf, len); - if (r == -1) + r = fread(buf, sizeof(char), len, f); + if ((r == 0 || r < len) && ferror(f)) return errno; if (r != len) return EIO; @@ -153,7 +153,7 @@ diff_atom_cmp(int *cmp, n_right = MIN(chunksz, remain_right); if (left->at == NULL) { - r = read_at(left->d->root->fd, + r = read_at(left->d->root->f, left->pos + (left->len - remain_left), buf_left, n_left); if (r) { @@ -166,7 +166,7 @@ diff_atom_cmp(int *cmp, } if (right->at == NULL) { - r = read_at(right->d->root->fd, + r = read_at(right->d->root->f, right->pos + (right->len - remain_right), buf_right, n_right); if (r) { @@ -290,11 +290,11 @@ chunk_added: } void -diff_data_init_root(struct diff_data *d, int fd, const uint8_t *data, +diff_data_init_root(struct diff_data *d, FILE *f, const uint8_t *data, unsigned long long len, bool ignore_whitespace) { *d = (struct diff_data){ - .fd = fd, + .f = f, .pos = 0, .data = data, .len = len, @@ -309,7 +309,7 @@ diff_data_init_subsection(struct diff_data *d, struct { struct diff_atom *last_atom = from_atom + atoms_count - 1; *d = (struct diff_data){ - .fd = -1, + .f = NULL, .pos = from_atom->pos, .data = from_atom->at, .len = (last_atom->pos + last_atom->len) - from_atom->pos, @@ -459,8 +459,8 @@ return_rc: struct diff_result * diff_main(const struct diff_config *config, - int left_fd, const uint8_t *left_data, off_t left_len, - int right_fd, const uint8_t *right_data, off_t right_len, + FILE *left_f, const uint8_t *left_data, off_t left_len, + FILE *right_f, const uint8_t *right_data, off_t right_len, bool ignore_whitespace) { struct diff_result *result = malloc(sizeof(struct diff_result)); @@ -468,9 +468,9 @@ diff_main(const struct diff_config *config, return NULL; *result = (struct diff_result){}; - diff_data_init_root(&result->left, left_fd, left_data, left_len, + diff_data_init_root(&result->left, left_f, left_data, left_len, ignore_whitespace); - diff_data_init_root(&result->right, right_fd, right_data, right_len, + diff_data_init_root(&result->right, right_f, right_data, right_len, ignore_whitespace); if (!config->atomize_func) { blob - 143bd26e70afdfe842d69ca968efc0ba504056e8 blob + 745a119ea407c711f457c96474762ca5f849ad2b --- lib/diff_myers.c +++ lib/diff_myers.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include blob - c5f915ddbc636ce240e5a211ed99ee7bb4fc2a3b blob + e93c1e9cc1701fc9a4f1d457d33924914bb20adf --- lib/diff_output.c +++ lib/diff_output.c @@ -28,21 +28,22 @@ static char get_atom_byte(struct diff_atom *atom, off_t off) { - char ch; + int ch; off_t cur; if (atom->at != NULL) return atom->at[off]; - cur = lseek(atom->d->root->fd, 0, SEEK_CUR); + cur = ftello(atom->d->root->f); if (cur == -1) abort(); /* XXX cannot return error */ if (cur != atom->pos + off && - lseek(atom->d->root->fd, atom->pos + off, SEEK_SET) == -1) + fseeko(atom->d->root->f, atom->pos + off, SEEK_SET) == -1) abort(); /* XXX cannot return error */ - if (read(atom->d->root->fd, &ch, sizeof(ch)) == -1) + ch = fgetc(atom->d->root->f); + if (ch == EOF) abort(); /* XXX cannot return error */ return ch;