bsdiff.c (7951B)
1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright 2003-2005 Colin Percival 5 * Copyright 2011-2012 Thieu Le 6 * Copyright 2012 Matthew Endsley 7 * Copyright 2020 Stephen Gregoratto 8 * All rights reserved 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted providing that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 28 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #if 0 33 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $"); 34 #endif 35 36 #include "config.h" 37 #include <sys/stat.h> 38 39 #include <bzlib.h> 40 #if HAVE_ERR 41 #include <err.h> 42 #endif 43 #include <fcntl.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #if HAVE_PLEDGE && HAVE_UNVEIL 48 #include <unistd.h> 49 #endif 50 51 #include "bsdiff.h" 52 53 static off_t 54 bz_write(FILE *f, void *buf, size_t size) 55 { 56 int bzerr; 57 BZFILE *bzf; 58 unsigned int out; 59 60 if ((bzf = BZ2_bzWriteOpen(&bzerr, f, 9, 0, 0)) == NULL) 61 errx(1, "BZ2_bzWriteOpen returned %d", bzerr); 62 BZ2_bzWrite(&bzerr, bzf, buf, size); 63 if (bzerr != BZ_OK) 64 errx(1, "BZ2_bzWrite returned %d", bzerr); 65 BZ2_bzWriteClose(&bzerr, bzf, 0, NULL, &out); 66 if (bzerr != BZ_OK) 67 errx(1, "BZ2_bzWriteClose returned %d", bzerr); 68 69 return (off_t)out; 70 } 71 72 int 73 main(int argc, char **argv) 74 { 75 uint8_t *old, *new; 76 off_t oldsize, newsize; 77 off_t *I, *V; 78 off_t scan, pos, len, filelen; 79 off_t lastscan, lastpos, lastoffset; 80 off_t oldscore, scsc; 81 off_t s, Sf, lenf, Sb, lenb; 82 off_t overlap, Ss, lens; 83 off_t i; 84 off_t dblen; 85 uint8_t *dbuf; 86 uint8_t ctrlbuf[24]; 87 struct bsdiff_header header = { "BSDIFF40", 0, 0, 0 }; 88 FILE *pf; 89 90 if (argc != 4) 91 usage(); 92 93 #if HAVE_PLEDGE && HAVE_UNVEIL 94 if (pledge("stdio rpath wpath cpath unveil", NULL) == -1) 95 err(1, "pledge"); 96 if (unveil(argv[1], "r") == -1) 97 err(1, "unveil(%s, r)", argv[1]); 98 if (unveil(argv[2], "r") == -1) 99 err(1, "unveil(%s, r)", argv[2]); 100 if (unveil(argv[3], "wc") == -1) 101 err(1, "unveil(%s, wc)", argv[3]); 102 if (pledge("stdio rpath wpath cpath", NULL) == -1) 103 err(1, "pledge"); 104 #endif 105 old = readfile(argv[1], &oldsize, &(struct stat){ 0 }); 106 107 if ((I = reallocarray(NULL, oldsize + 1, sizeof(off_t))) == NULL) 108 err(1, "reallocarray"); 109 if ((V = reallocarray(NULL, oldsize + 1, sizeof(off_t))) == NULL) 110 err(1, "reallocarray"); 111 112 qsufsort(I, V, old, oldsize); 113 114 free(V); 115 116 new = readfile(argv[2], &newsize, &(struct stat){ 0 }); 117 #if HAVE_PLEDGE && HAVE_UNVEIL 118 if (pledge("stdio wpath cpath", NULL) == -1) 119 err(1, "pledge"); 120 #endif 121 /* Create the patch file */ 122 if ((pf = fopen(argv[3], "w")) == NULL) 123 err(1, "fopen(%s)", argv[3]); 124 #if HAVE_PLEDGE && HAVE_UNVEIL 125 if (pledge("stdio", NULL) == -1) 126 err(1, "pledge"); 127 #endif 128 129 if ((dbuf = reallocarray(NULL, newsize + 1, 1)) == NULL) 130 err(1, "reallocarray"); 131 filelen = 0; 132 133 /* 134 * The structure of a bsdiff patch is a 32 byte plaintext header 135 * and three bzip2'd data blocks. In more detail: 136 * 137 * offset size 138 * 0 8 "BSDIFF40" 139 * 8 8 sizeof(bzip2(ctrl block)): X 140 * 16 8 sizeof(bzip2(diff block)): Y 141 * 24 8 sizeof(newfile) 142 * 32 X bzip2(ctrl block) 143 * 32 + X Y bzip2(diff block) 144 * 32 + X + Y ?? bzip2(extra block) 145 * 146 * The ctrl block is a set of triples (x, y, z) that tells the patch 147 * program to: 148 * 149 * - Add x bytes from oldfile to x bytes from the diff block. 150 * - Copy y bytes from the extra block. 151 * - Seek forwards in oldfile by z bytes. 152 */ 153 header.new_file_len = newsize; 154 if (fwrite(&header, 1, sizeof(header), pf) != 32) 155 err(1, "fwrite(%s, %zu)", argv[3], sizeof(header)); 156 filelen += 32; 157 158 /* Compute the differences, writing ctrl as we go */ 159 scan = len = lenf = lenb = lastscan = pos = lastpos = lastoffset = 0; 160 while (scan < newsize) { 161 oldscore = 0; 162 163 /* 164 * If we come across a large block of data that only differs by 165 * less than 8 bytes, this loop takes a long time to go past it. 166 * Track the number of times we're stuck in the block and break 167 * out of it. 168 */ 169 int num_less_than_eight = 0; 170 off_t prev_len, prev_oldscore, prev_pos; 171 for (scsc = scan += len; scan < newsize; scan++) { 172 prev_len = len; 173 prev_oldscore = oldscore; 174 prev_pos = pos; 175 176 len = search(I, old, oldsize, new + scan, newsize - scan, 0, oldsize, &pos); 177 178 for (; scsc < scan + len; scsc++) 179 if (scsc + lastoffset < oldsize && 180 old[scsc + lastoffset] == new[scsc]) 181 oldscore++; 182 183 if ((len == oldscore && len != 0) || len > oldscore + 8) 184 break; 185 186 if (scan + lastoffset < oldsize && old[scan + lastoffset] == new[scan]) 187 oldscore--; 188 189 const off_t fuzz = 8; 190 if (prev_len - fuzz <= len && len <= prev_len && 191 prev_oldscore - fuzz <= oldscore && 192 oldscore <= prev_oldscore && 193 prev_pos <= pos && pos <= prev_pos + fuzz && 194 oldscore <= len && len <= oldscore + fuzz) 195 num_less_than_eight++; 196 else 197 num_less_than_eight = 0; 198 if (num_less_than_eight > 100) 199 break; 200 } 201 202 if (len != oldscore || scan == newsize) { 203 s = Sf = lenf = 0; 204 for (i = 0; lastscan + i < scan && lastpos + i < oldsize;) { 205 if (old[lastpos + i] == new[lastscan + i]) 206 s++; 207 i++; 208 if (s * 2 - i > Sf * 2 - lenf) { 209 Sf = s; 210 lenf = i; 211 } 212 } 213 214 lenb = 0; 215 if (scan < newsize) { 216 s = Sb = 0; 217 for (i = 1; scan >= lastscan + i && pos >= i; i++) { 218 if (old[pos - i] == new[scan - i]) 219 s++; 220 if (s * 2 - i > Sb * 2 - lenb) { 221 Sb = s; 222 lenb = i; 223 } 224 } 225 } 226 227 if (lastscan + lenf > scan - lenb) { 228 overlap = (lastscan + lenf) - (scan - lenb); 229 s = Ss = lens = 0; 230 for (i = 0; i < overlap; i++) { 231 if (new[lastscan + lenf - overlap + i] == 232 old[lastpos + lenf - overlap + i]) 233 s++; 234 if (new[scan - lenb + i] == old[pos - lenb + i]) 235 s--; 236 if (s > Ss) { 237 Ss = s; 238 lens = i + 1; 239 } 240 } 241 242 lenf += lens - overlap; 243 lenb -= lens; 244 } 245 } 246 } 247 /* Compute the size of the blocks and write them */ 248 offtout(lenf, ctrlbuf); 249 offtout((scan - lenb) - (lastscan + lenf), ctrlbuf + 8); 250 offtout((pos - lenb) - (lastpos + lenf), ctrlbuf + 16); 251 filelen += bz_write(pf, ctrlbuf, sizeof(ctrlbuf)); 252 header.ctrl_block_len = filelen - 32; 253 254 dblen = lenf; 255 for (i = 0; i < dblen; i++) 256 dbuf[i] = new[lastscan + i] - old[lastpos + i]; 257 filelen += bz_write(pf, dbuf, dblen); 258 header.diff_block_len = filelen - (header.ctrl_block_len + 32); 259 260 dblen = (scan - lenb) - (lastscan + lenf); 261 for (i = 0; i < dblen; i++) 262 dbuf[i] = new[lastscan + lenf + i]; 263 bz_write(pf, dbuf, dblen); 264 265 /* Seek to the beginning, write the header, and close the file */ 266 if (fseeko(pf, 0, SEEK_SET)) 267 err(1, "fseeko"); 268 if (fwrite(&header, 1, sizeof(header), pf) != 32) 269 err(1, "fwrite(%s)", argv[3]); 270 if (fclose(pf)) 271 err(1, "fclose"); 272 273 /* Free the memory we used */ 274 free(dbuf); 275 free(I); 276 free(old); 277 free(new); 278 279 return 0; 280 }