bsdiff-portable

A more portable version of Colin Percival's bsdiff
git clone git://git.sgregoratto.me/bsdiff-portable
Log | Files | Refs | README | LICENSE

bsdiff.c (7951B)


      1 /*-
      2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
      3  *
      4  * Copyright 2003-2005 Colin Percival
      5  * Copyright 2011-2012 Thieu Le
      6  * Copyright 2012      Matthew Endsley
      7  * Copyright 2020      Stephen Gregoratto
      8  * All rights reserved
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted providing that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
     23  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
     28  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #if 0
     33 __FBSDID("$FreeBSD: src/usr.bin/bsdiff/bsdiff/bsdiff.c,v 1.1 2005/08/06 01:59:05 cperciva Exp $");
     34 #endif
     35 
     36 #include "config.h"
     37 #include <sys/stat.h>
     38 
     39 #include <bzlib.h>
     40 #if HAVE_ERR
     41 #include <err.h>
     42 #endif
     43 #include <fcntl.h>
     44 #include <stdio.h>
     45 #include <stdlib.h>
     46 #include <string.h>
     47 #if HAVE_PLEDGE && HAVE_UNVEIL
     48 #include <unistd.h>
     49 #endif
     50 
     51 #include "bsdiff.h"
     52 
     53 static off_t
     54 bz_write(FILE *f, void *buf, size_t size)
     55 {
     56 	int bzerr;
     57 	BZFILE *bzf;
     58 	unsigned int out;
     59 
     60 	if ((bzf = BZ2_bzWriteOpen(&bzerr, f, 9, 0, 0)) == NULL)
     61 		errx(1, "BZ2_bzWriteOpen returned %d", bzerr);
     62 	BZ2_bzWrite(&bzerr, bzf, buf, size);
     63 	if (bzerr != BZ_OK)
     64 		errx(1, "BZ2_bzWrite returned %d", bzerr);
     65 	BZ2_bzWriteClose(&bzerr, bzf, 0, NULL, &out);
     66 	if (bzerr != BZ_OK)
     67 		errx(1, "BZ2_bzWriteClose returned %d", bzerr);
     68 
     69 	return (off_t)out;
     70 }
     71 
     72 int
     73 main(int argc, char **argv)
     74 {
     75 	uint8_t *old, *new;
     76 	off_t oldsize, newsize;
     77 	off_t *I, *V;
     78 	off_t scan, pos, len, filelen;
     79 	off_t lastscan, lastpos, lastoffset;
     80 	off_t oldscore, scsc;
     81 	off_t s, Sf, lenf, Sb, lenb;
     82 	off_t overlap, Ss, lens;
     83 	off_t i;
     84 	off_t dblen;
     85 	uint8_t *dbuf;
     86 	uint8_t ctrlbuf[24];
     87 	struct bsdiff_header header = { "BSDIFF40", 0, 0, 0 };
     88 	FILE *pf;
     89 
     90 	if (argc != 4)
     91 		usage();
     92 
     93 #if HAVE_PLEDGE && HAVE_UNVEIL
     94 	if (pledge("stdio rpath wpath cpath unveil", NULL) == -1)
     95 		err(1, "pledge");
     96 	if (unveil(argv[1], "r") == -1)
     97 		err(1, "unveil(%s, r)", argv[1]);
     98 	if (unveil(argv[2], "r") == -1)
     99 		err(1, "unveil(%s, r)", argv[2]);
    100 	if (unveil(argv[3], "wc") == -1)
    101 		err(1, "unveil(%s, wc)", argv[3]);
    102 	if (pledge("stdio rpath wpath cpath", NULL) == -1)
    103 		err(1, "pledge");
    104 #endif
    105 	old = readfile(argv[1], &oldsize, &(struct stat){ 0 });
    106 
    107 	if ((I = reallocarray(NULL, oldsize + 1, sizeof(off_t))) == NULL)
    108 		err(1, "reallocarray");
    109 	if ((V = reallocarray(NULL, oldsize + 1, sizeof(off_t))) == NULL)
    110 		err(1, "reallocarray");
    111 
    112 	qsufsort(I, V, old, oldsize);
    113 
    114 	free(V);
    115 
    116 	new = readfile(argv[2], &newsize, &(struct stat){ 0 });
    117 #if HAVE_PLEDGE && HAVE_UNVEIL
    118 	if (pledge("stdio wpath cpath", NULL) == -1)
    119 		err(1, "pledge");
    120 #endif
    121 	/* Create the patch file */
    122 	if ((pf = fopen(argv[3], "w")) == NULL)
    123 		err(1, "fopen(%s)", argv[3]);
    124 #if HAVE_PLEDGE && HAVE_UNVEIL
    125 	if (pledge("stdio", NULL) == -1)
    126 		err(1, "pledge");
    127 #endif
    128 
    129 	if ((dbuf = reallocarray(NULL, newsize + 1, 1)) == NULL)
    130 		err(1, "reallocarray");
    131 	filelen = 0;
    132 
    133 	/*
    134 	 * The structure of a bsdiff patch is a 32 byte plaintext header
    135 	 * and three bzip2'd data blocks. In more detail:
    136 	 *
    137 	 *  offset         size
    138 	 *	 0            8 "BSDIFF40"
    139 	 *	 8            8 sizeof(bzip2(ctrl block)): X
    140 	 *	16            8 sizeof(bzip2(diff block)): Y
    141 	 *	24            8 sizeof(newfile)
    142 	 *	32            X bzip2(ctrl block)
    143 	 *	32 + X        Y bzip2(diff block)
    144 	 *	32 + X + Y   ?? bzip2(extra block)
    145 	 *
    146 	 * The ctrl block is a set of triples (x, y, z) that tells the patch
    147 	 * program to:
    148 	 *
    149 	 * - Add x bytes from oldfile to x bytes from the diff block.
    150 	 * - Copy y bytes from the extra block.
    151 	 * - Seek forwards in oldfile by z bytes.
    152 	 */
    153 	header.new_file_len = newsize;
    154 	if (fwrite(&header, 1, sizeof(header), pf) != 32)
    155 		err(1, "fwrite(%s, %zu)", argv[3], sizeof(header));
    156 	filelen += 32;
    157 
    158 	/* Compute the differences, writing ctrl as we go */
    159 	scan = len = lenf = lenb = lastscan = pos = lastpos = lastoffset = 0;
    160 	while (scan < newsize) {
    161 		oldscore = 0;
    162 
    163 		/*
    164 		 * If we come across a large block of data that only differs by
    165 		 * less than 8 bytes, this loop takes a long time to go past it.
    166 		 * Track the number of times we're stuck in the block and break
    167 		 * out of it.
    168 		 */
    169 		int num_less_than_eight = 0;
    170 		off_t prev_len, prev_oldscore, prev_pos;
    171 		for (scsc = scan += len; scan < newsize; scan++) {
    172 			prev_len = len;
    173 			prev_oldscore = oldscore;
    174 			prev_pos = pos;
    175 
    176 			len = search(I, old, oldsize, new + scan, newsize - scan, 0, oldsize, &pos);
    177 
    178 			for (; scsc < scan + len; scsc++)
    179 				if (scsc + lastoffset < oldsize &&
    180 				    old[scsc + lastoffset] == new[scsc])
    181 					oldscore++;
    182 
    183 			if ((len == oldscore && len != 0) || len > oldscore + 8)
    184 				break;
    185 
    186 			if (scan + lastoffset < oldsize && old[scan + lastoffset] == new[scan])
    187 				oldscore--;
    188 
    189 			const off_t fuzz = 8;
    190 			if (prev_len - fuzz <= len && len <= prev_len &&
    191 			    prev_oldscore - fuzz <= oldscore &&
    192 			    oldscore <= prev_oldscore &&
    193 			    prev_pos <= pos && pos <= prev_pos + fuzz &&
    194 			    oldscore <= len && len <= oldscore + fuzz)
    195 				num_less_than_eight++;
    196 			else
    197 				num_less_than_eight = 0;
    198 			if (num_less_than_eight > 100)
    199 				break;
    200 		}
    201 
    202 		if (len != oldscore || scan == newsize) {
    203 			s = Sf = lenf = 0;
    204 			for (i = 0; lastscan + i < scan && lastpos + i < oldsize;) {
    205 				if (old[lastpos + i] == new[lastscan + i])
    206 					s++;
    207 				i++;
    208 				if (s * 2 - i > Sf * 2 - lenf) {
    209 					Sf = s;
    210 					lenf = i;
    211 				}
    212 			}
    213 
    214 			lenb = 0;
    215 			if (scan < newsize) {
    216 				s = Sb = 0;
    217 				for (i = 1; scan >= lastscan + i && pos >= i; i++) {
    218 					if (old[pos - i] == new[scan - i])
    219 						s++;
    220 					if (s * 2 - i > Sb * 2 - lenb) {
    221 						Sb = s;
    222 						lenb = i;
    223 					}
    224 				}
    225 			}
    226 
    227 			if (lastscan + lenf > scan - lenb) {
    228 				overlap = (lastscan + lenf) - (scan - lenb);
    229 				s = Ss = lens = 0;
    230 				for (i = 0; i < overlap; i++) {
    231 					if (new[lastscan + lenf - overlap + i] ==
    232 					    old[lastpos + lenf - overlap + i])
    233 						s++;
    234 					if (new[scan - lenb + i] == old[pos - lenb + i])
    235 						s--;
    236 					if (s > Ss) {
    237 						Ss = s;
    238 						lens = i + 1;
    239 					}
    240 				}
    241 
    242 				lenf += lens - overlap;
    243 				lenb -= lens;
    244 			}
    245 		}
    246 	}
    247 	/* Compute the size of the blocks and write them */
    248 	offtout(lenf, ctrlbuf);
    249 	offtout((scan - lenb) - (lastscan + lenf), ctrlbuf + 8);
    250 	offtout((pos - lenb) - (lastpos + lenf), ctrlbuf + 16);
    251 	filelen += bz_write(pf, ctrlbuf, sizeof(ctrlbuf));
    252 	header.ctrl_block_len = filelen - 32;
    253 
    254 	dblen = lenf;
    255 	for (i = 0; i < dblen; i++)
    256 		dbuf[i] = new[lastscan + i] - old[lastpos + i];
    257 	filelen += bz_write(pf, dbuf, dblen);
    258 	header.diff_block_len = filelen - (header.ctrl_block_len + 32);
    259 
    260 	dblen = (scan - lenb) - (lastscan + lenf);
    261 	for (i = 0; i < dblen; i++)
    262 		dbuf[i] = new[lastscan + lenf + i];
    263 	bz_write(pf, dbuf, dblen);
    264 
    265 	/* Seek to the beginning, write the header, and close the file */
    266 	if (fseeko(pf, 0, SEEK_SET))
    267 		err(1, "fseeko");
    268 	if (fwrite(&header, 1, sizeof(header), pf) != 32)
    269 		err(1, "fwrite(%s)", argv[3]);
    270 	if (fclose(pf))
    271 		err(1, "fclose");
    272 
    273 	/* Free the memory we used */
    274 	free(dbuf);
    275 	free(I);
    276 	free(old);
    277 	free(new);
    278 
    279 	return 0;
    280 }