rpm  5.4.10
rpmgrep.c
Go to the documentation of this file.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4 
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
7 directories.
8 
9  Copyright (c) 1997-2008 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15  * Redistributions of source code must retain the above copyright notice,
16  this list of conditions and the following disclaimer.
17 
18  * Redistributions in binary form must reproduce the above copyright
19  notice, this list of conditions and the following disclaimer in the
20  documentation and/or other materials provided with the distribution.
21 
22  * Neither the name of the University of Cambridge nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 #include "system.h"
41 
42 #define _MIRE_INTERNAL
43 #include <rpmio_internal.h> /* XXX fdGetFILE */
44 #include <rpmdir.h>
45 #include <poptIO.h>
46 
47 #include "debug.h"
48 
49 /*@access miRE @*/
50 
51 typedef unsigned BOOL;
52 #define FALSE ((BOOL)0)
53 #define TRUE ((BOOL)1)
54 
55 #define MAX_PATTERN_COUNT 100
56 
57 #if BUFSIZ > 8192
58 #define MBUFTHIRD BUFSIZ
59 #else
60 #define MBUFTHIRD 8192
61 #endif
62 
63 static inline void fwrite_check(const void *ptr, size_t size, size_t nmemb, FILE *stream)
64 {
65  if(fwrite(ptr, size, nmemb, stream) != nmemb)
66  perror("fwrite");
67 }
68 
69 /*************************************************
70 * Global variables *
71 *************************************************/
72 
73 /*@unchecked@*/ /*@only@*/ /*@null@*/
74 static const char *newline = NULL;
75 
76 /*@unchecked@*/ /*@only@*/ /*@null@*/
77 static const char *color_string = NULL;
78 /*@unchecked@*/ /*@only@*/ /*@null@*/
79 static ARGV_t pattern_filenames = NULL;
80 /*@unchecked@*/ /*@only@*/ /*@null@*/
81 static const char *stdin_name = NULL;
82 
83 /*@unchecked@*/ /*@only@*/ /*@null@*/
84 static const char *locale = NULL;
85 
86 /*@unchecked@*/ /*@only@*/ /*@relnull@*/
87 static ARGV_t patterns = NULL;
88 /*@unchecked@*/ /*@only@*/ /*@relnull@*/
89 static miRE pattern_list = NULL;
90 /*@unchecked@*/
91 static int pattern_count = 0;
92 
93 /*@unchecked@*/ /*@only@*/ /*@null@*/
94 static ARGV_t exclude_patterns = NULL;
95 /*@unchecked@*/ /*@only@*/ /*@relnull@*/
96 static miRE excludeMire = NULL;
97 /*@unchecked@*/
98 static int nexcludes = 0;
99 
100 /*@unchecked@*/ /*@only@*/ /*@null@*/
101 static ARGV_t include_patterns = NULL;
102 /*@unchecked@*/ /*@only@*/ /*@relnull@*/
103 static miRE includeMire = NULL;
104 /*@unchecked@*/
105 static int nincludes = 0;
106 
107 /*@unchecked@*/
108 static int after_context = 0;
109 /*@unchecked@*/
110 static int before_context = 0;
111 /*@unchecked@*/
112 static int both_context = 0;
113 
116 /*@unchecked@*/
117 static enum dee_e dee_action = dee_READ;
118 
120 enum DEE_e { DEE_READ=1, DEE_SKIP };
121 /*@unchecked@*/
122 static enum DEE_e DEE_action = DEE_READ;
123 
124 /*@unchecked@*/
125 static int error_count = 0;
126 
133 /*@unchecked@*/
134 static enum FN_e filenames = FN_DEFAULT;
135 
136 #define _GFB(n) ((1U << (n)) | 0x40000000)
137 #define GF_ISSET(_FLAG) ((grepFlags & ((GREP_FLAGS_##_FLAG) & ~0x40000000)) != GREP_FLAGS_NONE)
138 
141 
142 /* XXX WATCHOUT: the next 3 bits are also used as an index, do not change!!! */
159 };
160 
161 /*@unchecked@*/
163 
164 #if defined(WITH_PCRE)
165 /*@unchecked@*/
167 #else
169 #endif
170 
171 /*@unchecked@*/
172 static struct rpmop_s grep_totalops;
173 /*@unchecked@*/
174 static struct rpmop_s grep_readops;
175 
182 /*@unchecked@*/ /*@observer@*/
183 static const char *prefix[] = {
184  "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q"
185 };
186 
187 /*@unchecked@*/ /*@observer@*/
188 static const char *suffix[] = {
189  "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$"
190 };
191 
193 /*@unchecked@*/ /*@observer@*/
194 static const unsigned utf8_table3[] = {
195  0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01
196 };
197 
198 /*@+charint@*/
199 /*@unchecked@*/ /*@observer@*/
200 static const char utf8_table4[] = {
201  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
203  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
204  3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
205 };
206 /*@=charint@*/
207 
208 /*************************************************
209  * Find end of line.
210  *
211  * The length of the endline sequence that is found is set via lenptr.
212  * This may be zero at the very end of the file if there is no line-ending
213  * sequence there.
214  *
215  * @param p current position in line
216  * @param endptr end of available data
217  * @retval *lenptr length of the eol sequence
218  * @return pointer to the last byte of the line
219 */
220 /*@observer@*/
221 static const char *
222 end_of_line(const char *p, const char *endptr, /*@out@*/ size_t *lenptr)
223  /*@modifies *lenptr @*/
224 {
225  switch(_mireEL) {
226  default: /* Just in case */
227  case EL_LF:
228  while (p < endptr && *p != '\n') p++;
229  if (p < endptr) {
230  *lenptr = 1;
231  return p + 1;
232  }
233  *lenptr = 0;
234  return endptr;
235  /*@notreached@*/ break;
236 
237  case EL_CR:
238  while (p < endptr && *p != '\r') p++;
239  if (p < endptr) {
240  *lenptr = 1;
241  return p + 1;
242  }
243  *lenptr = 0;
244  return endptr;
245  /*@notreached@*/ break;
246 
247  case EL_CRLF:
248  for (;;) {
249  while (p < endptr && *p != '\r') p++;
250  if (++p >= endptr) {
251  *lenptr = 0;
252  return endptr;
253  }
254  if (*p == '\n') {
255  *lenptr = 2;
256  return p + 1;
257  }
258  }
259  /*@notreached@*/ break;
260 
261  case EL_ANYCRLF:
262  while (p < endptr) {
263  size_t extra = 0;
264  unsigned int c = (unsigned)*((unsigned char *)p);
265 
266  if (GF_ISSET(UTF8) && c >= 0xc0) {
267  size_t gcii, gcss;
268  extra = (size_t)utf8_table4[c & 0x3f]; /* No. of additional bytes */
269  gcss = 6*extra;
270  c = (c & utf8_table3[extra]) << gcss;
271  for (gcii = 1; gcii <= extra; gcii++) {
272  gcss -= 6;
273  c |= ((unsigned)p[gcii] & 0x3f) << gcss;
274  }
275  }
276 
277  p += 1 + extra;
278 
279  switch (c) {
280  case 0x0a: /* LF */
281  *lenptr = 1;
282  return p;
283  /*@notreached@*/ /*@switchbreak@*/ break;
284 
285  case 0x0d: /* CR */
286  if (p < endptr && (unsigned)*p == 0x0a) {
287  *lenptr = 2;
288  p++;
289  }
290  else *lenptr = 1;
291  return p;
292  /*@notreached@*/ /*@switchbreak@*/ break;
293 
294  default:
295  /*@switchbreak@*/ break;
296  }
297  } /* End of loop for ANYCRLF case */
298 
299  *lenptr = 0; /* Must have hit the end */
300  return endptr;
301  /*@notreached@*/ break;
302 
303  case EL_ANY:
304  while (p < endptr) {
305  size_t extra = 0;
306  unsigned int c = (unsigned)*((unsigned char *)p);
307 
308  if (GF_ISSET(UTF8) && c >= 0xc0) {
309  size_t gcii, gcss;
310  extra = (size_t)utf8_table4[c & 0x3f]; /* No. of additional bytes */
311  gcss = 6*extra;
312  c = (c & utf8_table3[extra]) << gcss;
313  for (gcii = 1; gcii <= extra; gcii++) {
314  gcss -= 6;
315  c |= ((unsigned)p[gcii] & 0x3f) << gcss;
316  }
317  }
318 
319  p += 1 + extra;
320 
321  switch (c) {
322  case 0x0a: /* LF */
323  case 0x0b: /* VT */
324  case 0x0c: /* FF */
325  *lenptr = 1;
326  return p;
327  /*@notreached@*/ /*@switchbreak@*/ break;
328 
329  case 0x0d: /* CR */
330  if (p < endptr && (unsigned)*p == 0x0a) {
331  *lenptr = 2;
332  p++;
333  }
334  else *lenptr = 1;
335  return p;
336  /*@notreached@*/ /*@switchbreak@*/ break;
337 
338  case 0x85: /* NEL */
339  *lenptr = GF_ISSET(UTF8) ? 2 : 1;
340  return p;
341  /*@notreached@*/ /*@switchbreak@*/ break;
342 
343  case 0x2028: /* LS */
344  case 0x2029: /* PS */
345  *lenptr = 3;
346  return p;
347  /*@notreached@*/ /*@switchbreak@*/ break;
348 
349  default:
350  /*@switchbreak@*/ break;
351  }
352  } /* End of loop for ANY case */
353 
354  *lenptr = 0; /* Must have hit the end */
355  return endptr;
356  /*@notreached@*/ break;
357  } /* End of overall switch */
358  /*@notreached@*/
359 }
360 
361 /*************************************************
362  * Find start of previous line
363  *
364  * This is called when looking back for before lines to print.
365  *
366  * @param p start of the subsequent line
367  * @param startptr start of available data
368  * @return pointer to the start of the previous line
369  */
370 /*@observer@*/
371 static const char *
372 previous_line(const char *p, const char *startptr)
373  /*@*/
374 {
375  switch (_mireEL) {
376  default: /* Just in case */
377  case EL_LF:
378  p--;
379  while (p > startptr && p[-1] != '\n') p--;
380  return p;
381  /*@notreached@*/ break;
382 
383  case EL_CR:
384  p--;
385  while (p > startptr && p[-1] != '\n') p--;
386  return p;
387  /*@notreached@*/ break;
388 
389  case EL_CRLF:
390  for (;;) {
391  p -= 2;
392  while (p > startptr && p[-1] != '\n') p--;
393  if (p <= startptr + 1 || p[-2] == '\r') return p;
394  }
395  /*@notreached@*/ return p; /* But control should never get here */
396  /*@notreached@*/ break;
397 
398  case EL_ANY:
399  case EL_ANYCRLF:
400  if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
401  if (GF_ISSET(UTF8)) while ((((unsigned)*p) & 0xc0) == 0x80) p--;
402 
403  while (p > startptr) {
404  const char *pp = p - 1;
405  unsigned int c;
406 
407  if (GF_ISSET(UTF8)) {
408  size_t extra = 0;
409  while ((((unsigned)*pp) & 0xc0) == 0x80) pp--;
410  c = (unsigned)*((unsigned char *)pp);
411  if (c >= 0xc0) {
412  size_t gcii, gcss;
413  extra = (size_t)utf8_table4[c & 0x3f]; /* No. of additional bytes */
414  gcss = 6*extra;
415  c = (c & utf8_table3[extra]) << gcss;
416  for (gcii = 1; gcii <= extra; gcii++) {
417  gcss -= 6;
418  c |= ((unsigned)pp[gcii] & 0x3f) << gcss;
419  }
420  }
421  } else
422  c = (unsigned)*((unsigned char *)pp);
423 
424  if (_mireEL == EL_ANYCRLF) {
425  switch (c) {
426  case 0x0a: /* LF */
427  case 0x0d: /* CR */
428  return p;
429  /*@notreached@*/ /*@switchbreak@*/ break;
430 
431  default:
432  /*@switchbreak@*/ break;
433  }
434  } else {
435  switch (c) {
436  case 0x0a: /* LF */
437  case 0x0b: /* VT */
438  case 0x0c: /* FF */
439  case 0x0d: /* CR */
440  case 0x85: /* NEL */
441  case 0x2028: /* LS */
442  case 0x2029: /* PS */
443  return p;
444  /*@notreached@*/ /*@switchbreak@*/ break;
445 
446  default:
447  /*@switchbreak@*/ break;
448  }
449  }
450 
451  p = pp; /* Back one character */
452  } /* End of loop for ANY case */
453 
454  return startptr; /* Hit start of data */
455  /*@notreached@*/ break;
456  } /* End of overall switch */
457  /*@notreached@*/
458 }
459 
460 /*************************************************
461  * Print the previous "after" lines
462  *
463  * This is called if we are about to lose said lines because of buffer filling,
464  * and at the end of the file. The data in the line is written using fwrite() so
465  * that a binary zero does not terminate it.
466  *
467  * @param lastmatchnumber the number of the last matching line, plus one
468  * @param lastmatchrestart where we restarted after the last match
469  * @param endptr end of available data
470  * @param printname filename for printing (or NULL)
471  */
472 static void do_after_lines(int lastmatchnumber, const char *lastmatchrestart,
473  const char *endptr, /*@null@*/ const char *printname)
474  /*@globals fileSystem @*/
475  /*@modifies fileSystem @*/
476 {
477  int count = 0;
478  while (lastmatchrestart < endptr && count++ < after_context) {
479  const char *pp = lastmatchrestart;
480  size_t ellength;
481  if (printname != NULL) fprintf(stdout, "%s-", printname);
482  if (GF_ISSET(LNUMBER)) fprintf(stdout, "%d-", lastmatchnumber++);
483  pp = end_of_line(pp, endptr, &ellength);
484  fwrite_check(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
485  lastmatchrestart = pp;
486  }
487 }
488 
489 /*************************************************
490  * Grep an individual file
491  *
492  * This is called from grep_or_recurse() below. It uses a buffer that is three
493  * times the value of MBUFTHIRD. The matching point is never allowed to stray
494  * into the top third of the buffer, thus keeping more of the file available
495  * for context printing or for multiline scanning. For large files, the pointer
496  * will be in the middle third most of the time, so the bottom third is
497  * available for "before" context printing.
498  *
499  * @param handle the fopen'd FILE stream for a normal file
500  * the gzFile pointer when reading is via libz
501  * the BZFILE pointer when reading is via libbz2
502  * @param frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
503  * @param printname the file name if it is to be printed for each match
504  * or NULL if the file name is not to be printed
505  * it cannot be NULL if filenames[_nomatch]_only is set
506  * @return 0: at least one match, 1: no match, 2: read error (bz2)
507  */
508 static int
509 pcregrep(FD_t fd, const char *printname)
510  /*@globals error_count, pattern_list, fileSystem @*/
511  /*@modifies fd, error_count, pattern_list, fileSystem @*/
512 {
513  int rc = 1;
514  int linenumber = 1;
515  int lastmatchnumber = 0;
516  int count = 0;
517  int filepos = 0;
518  int offsets[99];
519  const char *lastmatchrestart = NULL;
520  char buffer[3*MBUFTHIRD];
521  const char *ptr = buffer;
522  const char *endptr;
523  size_t bufflength;
524  static BOOL hyphenpending = FALSE;
525  BOOL endhyphenpending = FALSE;
526  BOOL invert = (GF_ISSET(INVERT) ? TRUE : FALSE);
527 
528  bufflength = Fread(buffer, 1, 3*MBUFTHIRD, fd);
529  endptr = buffer + bufflength;
530 
531  /*
532  * Loop while the current pointer is not at the end of the file. For large
533  * files, endptr will be at the end of the buffer when we are in the middle
534  * of the file, but ptr will never get there, because as soon as it gets
535  * over 2/3 of the way, the buffer is shifted left and re-filled.
536  */
537  while (ptr < endptr) {
538  int i;
539  int mrc = 0;
540  BOOL match = FALSE;
541  const char *matchptr = ptr;
542  const char *t = ptr;
543  size_t length, linelength;
544  size_t endlinelength;
545 
546  /*
547  * At this point, ptr is at the start of a line. We need to find the
548  * length of the subject string to pass to mireRegexec(). In multiline
549  * mode, it is the length remainder of the data in the buffer.
550  * Otherwise, it is the length of the next line. After matching, we
551  * always advance by the length of the next line. In multiline mode
552  * the PCRE_FIRSTLINE option is used for compiling, so that any match
553  * is constrained to be in the first line.
554  */
555  t = end_of_line(t, endptr, &endlinelength);
556  linelength = t - ptr - endlinelength;
557  length = GF_ISSET(MULTILINE) ? (size_t)(endptr - ptr) : linelength;
558 
559  /*
560  * We come back here after a match when the -o,--only-matching option
561  * is set, in order to find any further matches in the same line.
562  */
563 ONLY_MATCHING_RESTART:
564 
565  /*
566  * Run through all the patterns until one matches. Note that we don't
567  * include the final newline in the subject string.
568  */
569  for (i = 0; i < pattern_count; i++) {
570  miRE mire = pattern_list + i;
571  int xx;
572 
573 /*@-onlytrans@*/
574  /* Set sub-string offset array. */
575  xx = mireSetEOptions(mire, offsets, 99);
576 
577  /* XXX WATCHOUT: mireRegexec w length=0 does strlen(matchptr)! */
578  mrc = (length > 0 ? mireRegexec(mire, matchptr, length) : -1);
579 /*@=onlytrans@*/
580  if (mrc >= 0) { match = TRUE; /*@innerbreak@*/ break; }
581  if (mrc < -1) { /* XXX -1 == NOMATCH, otherwise error. */
582  fprintf(stderr, _("%s: pcre_exec() error %d while matching "), __progname, mrc);
583  if (pattern_count > 1) fprintf(stderr, _("pattern number %d to "), i+1);
584  fprintf(stderr, _("this line:\n"));
585  fwrite_check(matchptr, 1, linelength, stderr); /* In case binary zero included */
586  fprintf(stderr, "\n");
587 #if defined(PCRE_ERROR_MATCHLIMIT)
588  if (error_count == 0 &&
589  (mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
590  {
591  fprintf(stderr,
592  _("%s: error %d means that a resource limit was exceeded\n"),
593  __progname, mrc);
594  fprintf(stderr,
595  _("%s: check your regex for nested unlimited loops\n"),
596  __progname);
597  }
598 #endif
599  if (error_count++ > 20) {
600  fprintf(stderr, _("%s: too many errors - abandoned\n"),
601  __progname);
602 /*@-exitarg@*/
603  exit(2);
604 /*@=exitarg@*/
605  }
606  match = invert; /* No more matching; don't show the line again */
607  /*@innerbreak@*/ break;
608  }
609  }
610 
611  /* If it's a match or a not-match (as required), do what's wanted. */
612  if (match != invert) {
613  BOOL hyphenprinted = FALSE;
614 
615  /* We've failed if we want a file that doesn't have any matches. */
616  if (filenames == FN_NOMATCH_ONLY) {
617  rc = 1;
618  goto exit;
619  }
620 
621  /* Just count if just counting is wanted. */
622  if (GF_ISSET(COUNT)) count++;
623 
624  /*
625  * If all we want is a file name, there is no need to scan any
626  * more lines in the file.
627  */
628  else if (filenames == FN_ONLY) {
629  if (printname != NULL) fprintf(stdout, "%s\n", printname);
630  rc = 0;
631  goto exit;
632  }
633 
634  /* Likewise, if all we want is a yes/no answer. */
635  else if (GF_ISSET(QUIET)) {
636  rc = 0;
637  goto exit;
638  }
639 
640  /*
641  * The --only-matching option prints just the substring that
642  * matched, and the --file-offsets and --line-offsets options
643  * output offsets for the matching substring (they both force
644  * --only-matching). None of these options prints any context.
645  * Afterwards, adjust the start and length, and then jump back
646  * to look for further matches in the same line. If we are in
647  * invert mode, however, nothing is printed - this could be
648  * still useful because the return code is set.
649  */
650  else if (GF_ISSET(ONLY_MATCHING)) {
651  if (!GF_ISSET(INVERT)) {
652  if (printname != NULL) fprintf(stdout, "%s:", printname);
653  if (GF_ISSET(LNUMBER)) fprintf(stdout, "%d:", linenumber);
654  if (GF_ISSET(LOFFSETS))
655  fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
656  offsets[1] - offsets[0]);
657  else if (GF_ISSET(FOFFSETS))
658  fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
659  offsets[1] - offsets[0]);
660  else
661  fwrite_check(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
662  fprintf(stdout, "\n");
663  matchptr += offsets[1];
664  length -= offsets[1];
665  match = FALSE;
666  goto ONLY_MATCHING_RESTART;
667  }
668  }
669 
670  /*
671  * This is the default case when none of the above options is set.
672  * We print the matching lines(s), possibly preceded and/or
673  * followed by other lines of context.
674  */
675  else {
676  /*
677  * See if there is a requirement to print some "after" lines
678  * from a previous match. We never print any overlaps.
679  */
680  if (after_context > 0
681  && lastmatchnumber > 0 && lastmatchrestart != NULL)
682  {
683  size_t ellength;
684  int linecount = 0;
685  const char *p = lastmatchrestart;
686 
687  while (p < ptr && linecount < after_context) {
688  p = end_of_line(p, ptr, &ellength);
689  linecount++;
690  }
691 
692  /*
693  * It is important to advance lastmatchrestart during this
694  * printing so that it interacts correctly with any
695  * "before" printing below. Print each line's data using
696  * fwrite() in case there are binary zeroes.
697  */
698  while (lastmatchrestart < p) {
699  const char *pp = lastmatchrestart;
700  if (printname != NULL) fprintf(stdout, "%s-", printname);
701  if (GF_ISSET(LNUMBER)) fprintf(stdout, "%d-", lastmatchnumber++);
702  pp = end_of_line(pp, endptr, &ellength);
703  fwrite_check(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
704  lastmatchrestart = pp;
705  }
706  if (lastmatchrestart != ptr) hyphenpending = TRUE;
707  }
708 
709  /* If there were non-contiguous lines printed above, insert hyphens. */
710  if (hyphenpending) {
711  fprintf(stdout, "--\n");
712  hyphenpending = FALSE;
713  hyphenprinted = TRUE;
714  }
715 
716  /*
717  * See if there is a requirement to print some "before" lines
718  * for this match. Again, don't print overlaps.
719  */
720  if (before_context > 0) {
721  int linecount = 0;
722  const char *p = ptr;
723 
724  while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
725  linecount < before_context)
726  {
727  linecount++;
728  p = previous_line(p, buffer);
729  }
730 
731  if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
732  fprintf(stdout, "--\n");
733 
734  while (p < ptr) {
735  size_t ellength;
736  const char *pp = p;
737  if (printname != NULL) fprintf(stdout, "%s-", printname);
738  if (GF_ISSET(LNUMBER)) fprintf(stdout, "%d-", linenumber - linecount--);
739  pp = end_of_line(pp, endptr, &ellength);
740  fwrite_check(p, 1, pp - p, stdout);
741  p = pp;
742  }
743  }
744 
745  /*
746  * Now print the matching line(s); ensure we set hyphenpending
747  * at the end of the file if any context lines are being output.
748  */
749  if (after_context > 0 || before_context > 0)
750  endhyphenpending = TRUE;
751 
752  if (printname != NULL) fprintf(stdout, "%s:", printname);
753  if (GF_ISSET(LNUMBER)) fprintf(stdout, "%d:", linenumber);
754 
755  /*
756  * In multiline mode, we want to print to the end of the line
757  * in which the end of the matched string is found, so we
758  * adjust linelength and the line number appropriately, but
759  * only when there actually was a match (invert not set).
760  * Because the PCRE_FIRSTLINE option is set, the start of
761  * the match will always be before the first newline sequence.
762  * */
763  if (GF_ISSET(MULTILINE)) {
764  size_t ellength;
765  const char *endmatch = ptr;
766  if (!GF_ISSET(INVERT)) {
767  endmatch += offsets[1];
768  t = ptr;
769  while (t < endmatch) {
770  t = end_of_line(t, endptr, &ellength);
771  if (t <= endmatch) linenumber++; else /*@innerbreak@*/ break;
772  }
773  }
774  endmatch = end_of_line(endmatch, endptr, &ellength);
775  linelength = endmatch - ptr - ellength;
776  }
777  /*
778  * NOTE: Use only fwrite() to output the data line, so that
779  * binary zeroes are treated as just another data character.
780  */
781 
782  /* We have to split the line(s) up if coloring. */
783  if (GF_ISSET(COLOR) && color_string != NULL) {
784  fwrite_check(ptr, 1, offsets[0], stdout);
785  fprintf(stdout, "%c[%sm", 0x1b, color_string);
786  fwrite_check(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
787  fprintf(stdout, "%c[00m", 0x1b);
788  fwrite_check(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
789  stdout);
790  }
791  else fwrite_check(ptr, 1, linelength + endlinelength, stdout);
792  }
793 
794  /* End of doing what has to be done for a match */
795  rc = 0; /* Had some success */
796 
797  /*
798  * Remember where the last match happened for after_context.
799  * We remember where we are about to restart, and that line's
800  * number.
801  */
802  lastmatchrestart = ptr + linelength + endlinelength;
803  lastmatchnumber = linenumber + 1;
804  }
805 
806  /*
807  * For a match in multiline inverted mode (which of course did not
808  * cause anything to be printed), we have to move on to the end of
809  * the match before proceeding.
810  */
811  if (GF_ISSET(MULTILINE) && GF_ISSET(INVERT) && match) {
812  size_t ellength;
813  const char *endmatch = ptr + offsets[1];
814  t = ptr;
815  while (t < endmatch) {
816  t = end_of_line(t, endptr, &ellength);
817  if (t <= endmatch) linenumber++; else /*@innerbreak@*/ break;
818  }
819  endmatch = end_of_line(endmatch, endptr, &ellength);
820  linelength = endmatch - ptr - ellength;
821  }
822 
823  /*
824  * Advance to after the newline and increment the line number. The
825  * file offset to the current line is maintained in filepos.
826  */
827  ptr += linelength + endlinelength;
828  filepos += linelength + endlinelength;
829  linenumber++;
830 
831  /*
832  * If we haven't yet reached the end of the file (the buffer is full),
833  * and the current point is in the top 1/3 of the buffer, slide the
834  * buffer down by 1/3 and refill it. Before we do this, if some
835  * unprinted "after" lines are about to be lost, print them.
836  */
837  if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD) {
838  if (after_context > 0 &&
839  lastmatchnumber > 0 && lastmatchrestart != NULL &&
840  lastmatchrestart < buffer + MBUFTHIRD)
841  {
842  if (after_context > 0
843  && lastmatchnumber > 0 && lastmatchrestart != NULL)
844  {
845  do_after_lines(lastmatchnumber, lastmatchrestart,
846  endptr, printname);
847  hyphenpending = TRUE;
848  }
849  lastmatchnumber = 0;
850  }
851 
852  /* Now do the shuffle */
853 
854 /*@-modobserver@*/ /* XXX buffer <=> t aliasing */
855  memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
856  ptr -= MBUFTHIRD;
857 
858  bufflength = 2*MBUFTHIRD;
859  bufflength += Fread(buffer + bufflength, 1, MBUFTHIRD, fd);
860  endptr = buffer + bufflength;
861 /*@=modobserver@*/
862 
863  /* Adjust any last match point */
864  if (lastmatchnumber > 0 && lastmatchrestart != NULL)
865  lastmatchrestart -= MBUFTHIRD;
866  }
867  } /* Loop through the whole file */
868 
869  /*
870  * End of file; print final "after" lines if wanted; do_after_lines sets
871  * hyphenpending if it prints something.
872  */
873  if (!GF_ISSET(ONLY_MATCHING) && !GF_ISSET(COUNT)) {
874  if (after_context > 0
875  && lastmatchnumber > 0 && lastmatchrestart != NULL)
876  {
877  do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
878  hyphenpending = TRUE;
879  }
880  hyphenpending |= endhyphenpending;
881  }
882 
883  /*
884  * Print the file name if we are looking for those without matches and
885  * there were none. If we found a match, we won't have got this far.
886  */
887  if (filenames == FN_NOMATCH_ONLY) {
888  if (printname != NULL) fprintf(stdout, "%s\n", printname);
889  rc = 0;
890  goto exit;
891  }
892 
893  /* Print the match count if wanted */
894  if (GF_ISSET(COUNT)) {
895  if (printname != NULL) fprintf(stdout, "%s:", printname);
896  fprintf(stdout, "%d\n", count);
897  }
898 
899 exit:
900  return rc;
901 }
902 
909 static int chkSuffix(const char * fn, const char * suffix)
910  /*@*/
911 {
912  size_t flen = strlen(fn);
913  size_t slen = strlen(suffix);
914  return (flen > slen && !strcmp(fn + flen - slen, suffix));
915 }
916 
917 /*************************************************
918  * Grep a file or recurse into a directory.
919  *
920  * Given a path name, if it's a directory, scan all the files if we are
921  * recursing; if it's a file, grep it.
922  *
923  * @param pathname the path to investigate
924  * @param dir_recurse TRUE if recursing is wanted (-r or -drecurse)
925  * @param only_one_at_top TRUE if the path is the only one at toplevel
926  * @return 0: at least one match, 1: no match, 2: read error (bz2)
927  *
928  * @note file opening failures are suppressed if "silent" is set.
929  */
930 static int
931 grep_or_recurse(const char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
932  /*@globals h_errno, fileSystem, internalState @*/
933  /*@modifies h_errno, fileSystem, internalState @*/
934 {
935  struct stat sb, *st = &sb;
936  int rc = 1;
937  size_t pathlen;
938  FD_t fd = NULL;
939  const char * fmode = "r.ufdio";
940  int xx;
941 
942  /* If the file name is "-" we scan stdin */
943  if (!strcmp(pathname, "-")) {
944  fd = fdDup(STDIN_FILENO);
945  goto openthestream;
946  }
947 
948  if ((xx = Stat(pathname, st)) != 0)
949  goto openthestream; /* XXX exit with Strerror(3) message. */
950 
951  /*
952  * If the file is a directory, skip if skipping or if we are recursing,
953  * scan each file within it, subject to any include or exclude patterns
954  * that were set. The scanning code is localized so it can be made
955  * system-specific.
956  */
957  if (S_ISDIR(st->st_mode))
958  switch (dee_action) {
959  case dee_READ:
960  break;
961  case dee_SKIP:
962  rc = 1;
963  goto exit;
964  /*@notreached@*/ break;
965  case dee_RECURSE:
966  { char buffer[1024];
967  DIR *dir = Opendir(pathname);
968  struct dirent *dp;
969 
970  if (dir == NULL) {
971  if (!GF_ISSET(SILENT))
972  fprintf(stderr, _("%s: Failed to open directory %s: %s\n"),
973  __progname, pathname, strerror(errno));
974  rc = 2;
975  goto exit;
976  }
977 
978  while ((dp = Readdir(dir)) != NULL) {
979  char sep = '/';
980 
981  if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, ".."))
982  continue;
983 
984  xx = snprintf(buffer, sizeof(buffer), "%.512s%c%.128s",
985  pathname, sep, dp->d_name);
986  buffer[sizeof(buffer)-1] = '\0';
987 
988 /*@-onlytrans@*/
989  if (mireApply(excludeMire, nexcludes, buffer, 0, -1) >= 0)
990  continue;
991 
992  if (mireApply(includeMire, nincludes, buffer, 0, +1) < 0)
993  continue;
994 /*@=onlytrans@*/
995 
996  xx = grep_or_recurse(buffer, dir_recurse, FALSE);
997  if (xx > 1) rc = xx;
998  else if (xx == 0 && rc == 1) rc = 0;
999  }
1000  xx = Closedir(dir);
1001  goto exit;
1002  } /*@notreached@*/ break;
1003  }
1004 
1005  /*
1006  * If the file is not a directory and not a regular file, skip it if
1007  * that's been requested.
1008  */
1009  else if (!S_ISREG(st->st_mode) && DEE_action == DEE_SKIP) {
1010  rc = 1;
1011  goto exit;
1012  }
1013 
1014  /*
1015  * Control reaches here if we have a regular file, or if we have a
1016  * directory and recursion or skipping was not requested, or if we have
1017  * anything else and skipping was not requested. The scan proceeds.
1018  * If this is the first and only argument at top level, we don't show
1019  * the file name, unless we are only showing the file name, or the
1020  * filename was forced (-H).
1021  */
1022  pathlen = strlen(pathname);
1023 
1024  /* Identify how to Fopen the file from the suffix. */
1025  if (chkSuffix(pathname, ".gz"))
1026  fmode = "r.gzdio"; /* Open with zlib decompression. */
1027  else if (chkSuffix(pathname, ".bz2"))
1028  fmode = "r.bzdio"; /* Open with bzip2 decompression. */
1029  else if (chkSuffix(pathname, ".lzma"))
1030  fmode = "r.lzdio"; /* Open with lzma decompression. */
1031  else if (chkSuffix(pathname, ".xz"))
1032  fmode = "r.xzdio"; /* Open with xz decompression. */
1033  else
1034  fmode = "r.ufdio";
1035 
1036  /* Open the stream. */
1037  fd = Fopen(pathname, fmode);
1038 openthestream:
1039  if (fd == NULL || Ferror(fd)) {
1040  if (!GF_ISSET(SILENT))
1041  fprintf(stderr, _("%s: Failed to open %s: %s\n"),
1042  __progname, pathname, Fstrerror(fd));
1043  rc = 2;
1044  goto exit;
1045  }
1046 
1047  /* Now grep the file */
1048  rc = pcregrep(fd, (filenames > FN_DEFAULT ||
1049  (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1050 
1051  if (fd != NULL)
1052  (void) rpmswAdd(&grep_readops, fdstat_op(fd, FDSTAT_READ));
1053 
1054 exit:
1055  if (fd != NULL)
1056  xx = Fclose(fd);
1057  return rc; /* Pass back the yield from pcregrep(). */
1058 }
1059 
1060 /*************************************************
1061  * Compile a single pattern.
1062  *
1063  * When the -F option has been used, this is called for each substring.
1064  * Otherwise it's called for each supplied pattern.
1065  *
1066  * @param pattern the pattern string
1067  * @param filename the file name (NULL for a command-line pattern)
1068  * @param count pattern index (0 is single pattern)
1069  * @return TRUE on success, FALSE after an error
1070  */
1071 static BOOL
1072 compile_single_pattern(const char *pattern,
1073  /*@null@*/ const char *filename, int count)
1074  /*@globals pattern_list, pattern_count, fileSystem @*/
1075  /*@modifies pattern_list, pattern_count, fileSystem @*/
1076 {
1077  miRE mire;
1078  char buffer[MBUFTHIRD + 16];
1079  int xx;
1080 
1082  fprintf(stderr, _("%s: Too many patterns (max %d)\n"), __progname,
1084  return FALSE;
1085  }
1086 
1087  sprintf(buffer, "%s%.*s%s", prefix[(int)(grepFlags & 0x7)],
1088  MBUFTHIRD, pattern, suffix[(int)(grepFlags & 0x7)]);
1089 
1090  mire = pattern_list + pattern_count;
1091 /*@-onlytrans@*/
1092  /* XXX initialize mire->{mode,tag,options,table}. */
1093  xx = mireSetCOptions(mire, grepMode, 0, 0, _mirePCREtables);
1094 
1095  if (!mireRegcomp(mire, buffer)) {
1096  pattern_count++;
1097  return TRUE;
1098  }
1099 /*@=onlytrans@*/
1100  /* Handle compile errors */
1101  mire->erroff -= (int)strlen(prefix[(int)(grepFlags & 0x7)]);
1102  if (mire->erroff < 0)
1103  mire->erroff = 0;
1104  if (mire->erroff > (int)strlen(pattern))
1105  mire->erroff = (int)strlen(pattern);
1106 
1107  fprintf(stderr, _("%s: Error in"), __progname);
1108  if (filename == NULL)
1109  fprintf(stderr, _(" command-line %d"), count);
1110  else
1111  fprintf(stderr, _(" file:line %s:%d"), filename, count);
1112  fprintf(stderr, _(" regex at offset %d: %s\n"), mire->erroff, mire->errmsg);
1113  return FALSE;
1114 }
1115 
1116 /*************************************************
1117  * Compile one supplied pattern.
1118  *
1119  * When the -F option has been used, each string may be a list of strings,
1120  * separated by line breaks. They will be matched literally.
1121  *
1122  * @param pattern the pattern string
1123  * @param filename the file name, or NULL for a command-line pattern
1124  * @param count pattern index (0 is single pattern)
1125  * @return TRUE on success, FALSE after an error
1126  */
1127 static BOOL
1128 compile_pattern(const char *pattern, /*@null@*/ const char *filename, int count)
1129  /*@globals fileSystem @*/
1130  /*@modifies fileSystem @*/
1131 {
1132  if (GF_ISSET(FIXED_STRINGS) != 0) {
1133  const char *eop = pattern + strlen(pattern);
1134  char buffer[MBUFTHIRD];
1135  for(;;) {
1136  size_t ellength;
1137  const char *p = end_of_line(pattern, eop, &ellength);
1138  if (ellength == 0)
1139  return compile_single_pattern(pattern, filename, count);
1140  sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1141  pattern = p;
1142  if (!compile_single_pattern(buffer, filename, count))
1143  return FALSE;
1144  }
1145  }
1146  else return compile_single_pattern(pattern, filename, count);
1147 }
1148 
1154 static int mireLoadPatternFiles(/*@null@*/ ARGV_t files)
1155  /*@globals h_errno, fileSystem, internalState @*/
1156  /*@modifies h_errno, fileSystem, internalState @*/
1157 {
1158  const char *fn;
1159  int rc = -1; /* assume failure */
1160 
1161  if (files != NULL) /* note rc=0 return with no files to load. */
1162  while ((fn = *files++) != NULL) {
1163  char buffer[MBUFTHIRD];
1164  int linenumber;
1165  FD_t fd = NULL;
1166  FILE *fp;
1167 
1168  if (strcmp(fn, "-") == 0) {
1169  fd = NULL;
1170  fp = stdin;
1171  fn = stdin_name; /* XXX use the stdin display name */
1172  } else {
1173  /* XXX .fpio is needed because of fgets(3) usage. */
1174  fd = Fopen(fn, "r.fpio");
1175  if (fd == NULL || Ferror(fd) || (fp = fdGetFILE(fd)) == NULL) {
1176  fprintf(stderr, _("%s: Failed to open %s: %s\n"),
1177  __progname, fn, Fstrerror(fd));
1178  if (fd != NULL) (void) Fclose(fd);
1179  fd = NULL;
1180  fp = NULL;
1181  goto exit;
1182  }
1183  }
1184 
1185  linenumber = 0;
1186  while (fgets(buffer, MBUFTHIRD, fp) != NULL) {
1187  char *se = buffer + (int)strlen(buffer);
1188  while (se > buffer && xisspace((int)se[-1]))
1189  se--;
1190  *se = '\0';
1191  linenumber++;
1192  /* Skip blank lines */
1193  if (buffer[0] == '\0') /*@innercontinue@*/ continue;
1194  if (!compile_pattern(buffer, fn, linenumber))
1195  goto exit;
1196  }
1197 
1198  if (fd != NULL) {
1199  (void) rpmswAdd(&grep_readops, fdstat_op(fd, FDSTAT_READ));
1200  (void) Fclose(fd);
1201  fd = NULL;
1202  }
1203  }
1204  rc = 0;
1205 
1206 exit:
1207  return rc;
1208 }
1209 
1210 /* Options without a single-letter equivalent get a negative value. This can be
1211 used to identify them. */
1212 
1215 static void grepArgCallback(poptContext con,
1216  /*@unused@*/ enum poptCallbackReason reason,
1217  const struct poptOption * opt, const char * arg,
1218  /*@unused@*/ void * data)
1219  /*@globals color_string, dee_action, DEE_action, grepFlags, fileSystem @*/
1220  /*@modifies color_string, dee_action, DEE_action, grepFlags, fileSystem @*/
1221 {
1222  /* XXX avoid accidental collisions with POPT_BIT_SET for flags */
1223  if (opt->arg == NULL)
1224  switch (opt->val) {
1225 
1226  case 'd':
1227  if (!strcmp(arg, "read")) dee_action = dee_READ;
1228  else if (!strcmp(arg, "recurse")) dee_action = dee_RECURSE;
1229  else if (!strcmp(arg, "skip")) dee_action = dee_SKIP;
1230  else {
1231  fprintf(stderr, _("%s: Invalid value \"%s\" for -d\n"),
1232  __progname, arg);
1233  /*@-exitarg@*/ exit(2); /*@=exitarg@*/
1234  /*@notreached@*/
1235  }
1236  break;
1237  case 'D':
1238  if (!strcmp(arg, "read")) DEE_action = DEE_READ;
1239  else if (!strcmp(arg, "skip")) DEE_action = DEE_SKIP;
1240  else {
1241  fprintf(stderr, _("%s: Invalid value \"%s\" for -D\n"),
1242  __progname, arg);
1243  /*@-exitarg@*/ exit(2); /*@=exitarg@*/
1244  /*@notreached@*/
1245  }
1246  break;
1247  case 'C':
1248  if (!strcmp(arg, "never"))
1250  else if (!strcmp(arg, "always"))
1252  else if (!strcmp(arg, "auto")) {
1253  if (isatty(fileno(stdout)))
1255  else
1257  } else {
1258  fprintf(stderr, _("%s: Unknown color setting \"%s\"\n"),
1259  __progname, arg);
1260  /*@-exitarg@*/ exit(2); /*@=exitarg@*/
1261  /*@notreached@*/
1262  }
1264  if (GF_ISSET(COLOR)) {
1265  char *cs = getenv("PCREGREP_COLOUR");
1266  if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1267  color_string = xstrdup(cs != NULL ? cs : "1;31");
1268  }
1269  break;
1270 
1271  case 'V':
1272 #if defined(WITH_PCRE)
1273 /*@-evalorderuncon -moduncon @*/
1274  fprintf(stderr, _("%s %s (PCRE version %s)\n"), __progname, VERSION, pcre_version());
1275 /*@=evalorderuncon =moduncon @*/
1276 #else
1277  fprintf(stderr, _("%s %s (without PCRE)\n"), __progname, VERSION);
1278 #endif
1279  exit(0);
1280  /*@notreached@*/ break;
1281  default:
1282  fprintf(stderr, _("%s: Unknown option -%c\n"), __progname, opt->val);
1283  poptPrintUsage(con, stderr, 0);
1284  /*@-exitarg@*/ exit(2); /*@=exitarg@*/
1285  /*@notreached@*/ break;
1286  }
1287 }
1288 
1291 /*@+enumint@*/
1292 /*@unchecked@*/
1293 static struct poptOption optionsTable[] = {
1294 /*@-type@*/ /* FIX: cast? */
1295  { NULL, '\0', POPT_ARG_CALLBACK | POPT_CBFLAG_INC_DATA | POPT_CBFLAG_CONTINUE,
1296  grepArgCallback, 0, NULL, NULL },
1297 /*@=type@*/
1298 
1299  { "after-context", 'A', POPT_ARG_INT, &after_context, 0,
1300  N_("set number of following context lines"), N_("=number") },
1301  { "before-context", 'B', POPT_ARG_INT, &before_context, 0,
1302  N_("set number of prior context lines"), N_("=number") },
1303  { "context", 'C', POPT_ARG_INT, &both_context, 0,
1304  N_("set number of context lines, before & after"), N_("=number") },
1305  { "count", 'c', POPT_BIT_SET, &grepFlags, GREP_FLAGS_COUNT,
1306  N_("print only a count of matching lines per FILE"), NULL },
1307  { "color", '\0', POPT_ARG_STRING, NULL, (int)'C',
1308  N_("matched text color option (auto|always|never)"), N_("=option") },
1309  { "colour", '\0', POPT_ARG_STRING|POPT_ARGFLAG_DOC_HIDDEN, NULL, (int)'C',
1310  N_("matched text colour option (auto|always|never)"), N_("=option") },
1311 /* XXX HACK: there is a shortName option conflict with -D,--define */
1312  { "devices", 'D', POPT_ARG_STRING, NULL, (int)'D',
1313  N_("device, FIFO, or socket action (read|skip)"), N_("=action") },
1314  { "directories", 'd', POPT_ARG_STRING, NULL, (int)'d',
1315  N_("directory action (read|skip|recurse)"), N_("=action") },
1316  { "regex", 'e', POPT_ARG_ARGV, &patterns, 0,
1317  N_("specify pattern (may be used more than once)"), N_("(p)") },
1318  { "fixed_strings", 'F', POPT_BIT_SET, &grepFlags, GREP_FLAGS_FIXED_STRINGS,
1319  N_("patterns are sets of newline-separated strings"), NULL },
1320  { "file", 'f', POPT_ARG_ARGV, &pattern_filenames, 0,
1321  N_("read patterns from file (may be used more than once)"),
1322  N_("=path") },
1323  { "file-offsets", '\0', POPT_BIT_SET, &grepFlags, GREP_FLAGS_FOFFSETS,
1324  N_("output file offsets, not text"), NULL },
1325  { "with-filename", 'H', POPT_ARG_VAL, &filenames, FN_FORCE,
1326  N_("force the prefixing filename on output"), NULL },
1327  { "no-filename", 'h', POPT_ARG_VAL, &filenames, FN_NONE,
1328  N_("suppress the prefixing filename on output"), NULL },
1329  { "ignore-case", 'i', POPT_BIT_SET, &grepFlags, GREP_FLAGS_CASELESS,
1330  N_("ignore case distinctions"), NULL },
1331  { "files-with-matches", 'l', POPT_ARG_VAL, &filenames, FN_ONLY,
1332  N_("print only FILE names containing matches"), NULL },
1333  { "files-without-match", 'L', POPT_ARG_VAL, &filenames, FN_NOMATCH_ONLY,
1334  N_("print only FILE names not containing matches"), NULL },
1335  { "label", '\0', POPT_ARG_STRING, &stdin_name, 0,
1336  N_("set name for standard input"), N_("=name") },
1337  { "line-offsets", '\0', POPT_BIT_SET, &grepFlags, (GREP_FLAGS_LOFFSETS|GREP_FLAGS_LNUMBER),
1338  N_("output line numbers and offsets, not text"), NULL },
1339  /* XXX TODO: --locale jiggery-pokery should be done env LC_ALL=C rpmgrep */
1340  { "locale", '\0', POPT_ARG_STRING, &locale, 0,
1341  N_("use the named locale"), N_("=locale") },
1342  { "multiline", 'M', POPT_BIT_SET, &grepFlags, GREP_FLAGS_MULTILINE,
1343  N_("run in multiline mode"), NULL },
1344  { "newline", 'N', POPT_ARG_STRING, &newline, 0,
1345  N_("set newline type (CR|LF|CRLF|ANYCRLF|ANY)"), N_("=type") },
1346  { "line-number", 'n', POPT_BIT_SET, &grepFlags, GREP_FLAGS_LNUMBER,
1347  N_("print line number with output lines"), NULL },
1348  { "only-matching", 'o', POPT_BIT_SET, &grepFlags, GREP_FLAGS_ONLY_MATCHING,
1349  N_("show only the part of the line that matched"), NULL },
1350 /* XXX HACK: there is a longName option conflict with --quiet */
1351  { "quiet", 'q', POPT_BIT_SET, &grepFlags, GREP_FLAGS_QUIET,
1352  N_("suppress output, just set return code"), NULL },
1353  { "recursive", 'r', POPT_ARG_VAL, &dee_action, dee_RECURSE,
1354  N_("recursively scan sub-directories"), NULL },
1355  { "exclude", '\0', POPT_ARG_ARGV, &exclude_patterns, 0,
1356  N_("exclude matching files when recursing (may be used more than once)"),
1357  N_("=pattern") },
1358  { "include", '\0', POPT_ARG_ARGV, &include_patterns, 0,
1359  N_("include matching files when recursing (may be used more than once)"),
1360  N_("=pattern") },
1361  { "no-messages", 's', POPT_BIT_SET, &grepFlags, GREP_FLAGS_SILENT,
1362  N_("suppress error messages"), NULL },
1363  { "silent", '\0', POPT_BIT_SET|POPT_ARGFLAG_DOC_HIDDEN, &grepFlags, GREP_FLAGS_SILENT,
1364  N_("suppress error messages"), NULL },
1365  { "utf-8", 'u', POPT_BIT_SET, &grepFlags, GREP_FLAGS_UTF8,
1366  N_("use UTF-8 mode"), NULL },
1367 /* XXX HACK: there is a longName option conflict with --version */
1368  { "version", 'V', POPT_ARG_NONE, NULL, (int)'V',
1369  N_("print version information and exit"), NULL },
1370 /* XXX HACK: there is a shortName option conflict with -v, --verbose */
1371  { "invert-match", 'v', POPT_BIT_SET, &grepFlags, GREP_FLAGS_INVERT,
1372  N_("select non-matching lines"), NULL },
1373  { "word-regex", 'w', POPT_BIT_SET, &grepFlags, GREP_FLAGS_WORD_MATCH,
1374  N_("force patterns to match only as words") , N_("(p)") },
1375  { "line-regex", 'x', POPT_BIT_SET, &grepFlags, GREP_FLAGS_LINE_MATCH,
1376  N_("force patterns to match only whole lines"), N_("(p)") },
1377 
1378  POPT_AUTOALIAS
1379 
1380  { NULL, '\0', POPT_ARG_INCLUDE_TABLE, rpmioAllPoptTable, 0,
1381  N_("Common options for all rpmio executables:"),
1382  NULL },
1383 
1384  POPT_AUTOHELP
1385 
1386  { NULL, (char)-1, POPT_ARG_INCLUDE_TABLE, NULL, 0,
1387  N_("\
1388 Usage: rpmgrep [OPTION...] [PATTERN] [FILE1 FILE2 ...]\n\n\
1389  Search for PATTERN in each FILE or standard input.\n\
1390  PATTERN must be present if neither -e nor -f is used.\n\
1391  \"-\" can be used as a file name to mean STDIN.\n\
1392  All files are read as plain files, without any interpretation.\n\n\
1393 Example: rpmgrep -i 'hello.*world' menu.h main.c\
1394 ") , NULL },
1395 
1396  { NULL, (char)-1, POPT_ARG_INCLUDE_TABLE, NULL, 0,
1397  N_("\
1398  When reading patterns from a file instead of using a command line option,\n\
1399  trailing white space is removed and blank lines are ignored.\n\
1400 \n\
1401  With no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n\
1402 ") , NULL },
1403 
1404  POPT_TABLEEND
1405 };
1406 /*@=enumint@*/
1407 
1408 /*************************************************
1409  * Main program.
1410  * @return 0: match found, 1: no match, 2: error.
1411  */
1412 int
1413 main(int argc, char **argv)
1414  /*@globals __assert_program_name, after_context, before_context,
1415  color_string, dee_action,
1416  exclude_patterns, excludeMire, grepFlags,
1417  include_patterns, includeMire, locale, newline,
1418  patterns, pattern_count, pattern_filenames, pattern_list,
1419  stdin_name,
1420  rpmGlobalMacroContext, h_errno, fileSystem, internalState @*/
1421  /*@modifies __assert_program_name, after_context, before_context,
1422  color_string, dee_action,
1423  exclude_patterns, excludeMire, grepFlags,
1424  include_patterns, includeMire, locale, newline,
1425  patterns, pattern_count, pattern_filenames, pattern_list,
1426  stdin_name,
1427  rpmGlobalMacroContext, h_errno, fileSystem, internalState @*/
1428 {
1429  poptContext optCon = rpmioInit(argc, argv, optionsTable);
1430  ARGV_t av = NULL;
1431  int ac = 0;
1432  int i = 0; /* assume av[0] is 1st argument. */
1433  int rc = 1; /* assume not found. */
1434  int j;
1435  int xx;
1436 
1437  xx = rpmswEnter(&grep_totalops, -1);
1438 
1439 /*@-observertrans -readonlytrans@*/
1440  __progname = "pcregrep"; /* XXX HACK in expected name. */
1441 /*@=observertrans =readonlytrans@*/
1442 
1443 
1444  if (stdin_name == NULL)
1445  stdin_name = xstrdup("(standard input)");
1446 
1447  av = poptGetArgs(optCon);
1448  ac = argvCount(av);
1449 
1450  /* If -C was used, its value is used as a default for -A and -B. */
1451  if (both_context > 0) {
1454  }
1455 
1456  /*
1457  * Only one of --only-matching, --file-offsets, or --line-offsets is
1458  * permitted. However, the latter two imply the --only-matching option.
1459  */
1460  if (((GF_ISSET(FOFFSETS) || GF_ISSET(LOFFSETS)) && GF_ISSET(ONLY_MATCHING))
1461  || (GF_ISSET(FOFFSETS) && GF_ISSET(LOFFSETS)))
1462  {
1463  fprintf(stderr,
1464 _("%s: Cannot mix --only-matching, --file-offsets and/or --line-offsets\n"),
1465  __progname);
1466  poptPrintUsage(optCon, stderr, 0);
1467  goto errxit;
1468  }
1469 
1470  if (GF_ISSET(FOFFSETS) || GF_ISSET(LOFFSETS))
1472 
1473  /* Compile locale-specific PCRE tables. */
1474  if ((xx = mireSetLocale(NULL, locale)) != 0)
1475  goto errxit;
1476 
1477  /* Initialize global pattern options. */
1478  /* Interpret the newline type; the default settings are Unix-like. */
1479 /*@-moduncon@*/ /* LCL: something fishy. */
1480  xx = mireSetGOptions(newline, GF_ISSET(CASELESS), GF_ISSET(MULTILINE),
1481  GF_ISSET(UTF8));
1482 /*@=moduncon@*/
1483  if (xx != 0) {
1484  fprintf(stderr, _("%s: Invalid newline specifier \"%s\"\n"),
1485  __progname, (newline != NULL ? newline : "lf"));
1486  goto errxit;
1487  }
1488 
1489  /* Get memory to store the pattern and hints lists. */
1490  /* XXX FIXME: rpmmireNew needs to be used here. */
1492 
1493  /*
1494  * If no patterns were provided by -e, and there is no file provided by -f,
1495  * the first argument is the one and only pattern, and it must exist.
1496  */
1497  { int npatterns = argvCount(patterns);
1498 
1499  /*
1500  * If no patterns were provided by -e, and no file was provided by -f,
1501  * the first argument is the one and only pattern, and it must exist.
1502  */
1503  if (npatterns == 0 && pattern_filenames == NULL) {
1504  if (av == NULL|| i >= ac) {
1505  poptPrintUsage(optCon, stderr, 0);
1506  goto errxit;
1507  }
1508  xx = poptSaveString(&patterns, POPT_ARG_ARGV, av[i]);
1509  i++;
1510  }
1511 
1512  /*
1513  * Compile the patterns that were provided on the command line, either
1514  * by multiple uses of -e or as a single unkeyed pattern.
1515  */
1516  npatterns = argvCount(patterns);
1517  if (patterns != NULL)
1518  for (j = 0; j < npatterns; j++) {
1519  if (!compile_pattern(patterns[j], NULL,
1520  (j == 0 && npatterns == 1)? 0 : j + 1))
1521  goto errxit;
1522  }
1523  }
1524 
1525  /* Compile the regular expressions that are provided from file(s). */
1527  goto errxit;
1528 
1529  /* Study the regular expressions, as we will be running them many times */
1530 /*@-onlytrans@*/
1532  goto errxit;
1533 /*@=onlytrans@*/
1534 
1535  /* If there are include or exclude patterns, compile them. */
1536 /*@-compmempass@*/
1538  &excludeMire, &nexcludes))
1539  {
1540 /*@-nullptrarith@*/
1541  miRE mire = excludeMire + (nexcludes - 1);
1542 /*@=nullptrarith@*/
1543  fprintf(stderr, _("%s: Error in 'exclude' regex at offset %d: %s\n"),
1544  __progname, mire->erroff, mire->errmsg);
1545  goto errxit;
1546  }
1547 /*@=compmempass@*/
1548 /*@-compmempass@*/
1550  &includeMire, &nincludes))
1551  {
1552 /*@-nullptrarith@*/
1553  miRE mire = includeMire + (nincludes - 1);
1554 /*@=nullptrarith@*/
1555  fprintf(stderr, _("%s: Error in 'include' regex at offset %d: %s\n"),
1556  __progname, mire->erroff, mire->errmsg);
1557  goto errxit;
1558  }
1559 /*@=compmempass@*/
1560 
1561  /* If there are no further arguments, do the business on stdin and exit. */
1562  if (i >= ac) {
1563  rc = grep_or_recurse("-", 0, 1);
1564  } else
1565 
1566  /*
1567  * Otherwise, work through the remaining arguments as files or directories.
1568  * Pass in the fact that there is only one argument at top level - this
1569  * suppresses the file name if the argument is not a directory and
1570  * filenames are not otherwise forced.
1571  */
1572  { BOOL only_one_at_top = (i == ac -1); /* Catch initial value of i */
1573 
1574  if (av != NULL)
1575  for (; i < ac; i++) {
1576  int frc = grep_or_recurse(av[i], dee_action == dee_RECURSE,
1577  only_one_at_top);
1578  if (frc > 1) rc = frc;
1579  else if (frc == 0 && rc == 1) rc = 0;
1580  }
1581  }
1582 
1583 exit:
1584 /*@-statictrans@*/
1591 
1593 
1594 /*@-observertrans@*/
1596  locale = _free(locale);
1597  newline = _free(newline);
1599 /*@=observertrans@*/
1600 /*@=statictrans@*/
1601 
1602  xx = rpmswExit(&grep_totalops, 0);
1603  if (_rpmsw_stats) {
1604  rpmswPrint(" total:", &grep_totalops, NULL);
1605  rpmswPrint(" read:", &grep_readops, NULL);
1606  }
1607 
1608  optCon = rpmioFini(optCon);
1609 
1610  return rc;
1611 
1612 errxit:
1613  rc = 2;
1614  goto exit;
1615 }
static const char * suffix[]
Definition: rpmgrep.c:188
static int chkSuffix(const char *fn, const char *suffix)
Check file name for a suffix.
Definition: rpmgrep.c:909
poptContext rpmioInit(int argc, char *const argv[], struct poptOption *optionsTable)
Definition: poptIO.c:752
static int before_context
Definition: rpmgrep.c:110
int mireSetEOptions(miRE mire, int *offsets, int noffsets)
Initialize pattern execute options (PCRE only).
Definition: mire.c:156
rpmtime_t rpmswExit(rpmop op, ssize_t rc)
Exit timed operation.
Definition: rpmsw.c:264
int mireApply(miRE mire, int nmire, const char *s, size_t slen, int rc)
Apply array of patterns to a string.
Definition: mire.c:553
static enum grepFlags_e grepFlags
Definition: rpmgrep.c:162
static ARGV_t pattern_filenames
Definition: rpmgrep.c:79
int mireLoadPatterns(rpmMireMode mode, int tag, const char **patterns, const unsigned char *table, miRE *mirep, int *nmirep)
Load patterns from string array.
Definition: mire.c:531
char * getenv(const char *name)
void * mireFreeAll(miRE mire, int nmire)
Destroy compiled patterns.
Definition: mire.c:96
char * xstrdup(const char *str)
Definition: rpmmalloc.c:322
const unsigned char * _mirePCREtables
Definition: mire.c:21
FD_t Fopen(const char *path, const char *_fmode)
fopen(3) clone.
Definition: rpmio.c:2833
DEE_e
Actions for the -D option.
Definition: rpmgrep.c:120
#define __progname
Definition: system.h:428
int mireRegcomp(miRE mire, const char *pattern)
Compile pattern match.
Definition: mire.c:334
static ARGV_t patterns
Definition: rpmgrep.c:87
int mireSetCOptions(miRE mire, rpmMireMode mode, int tag, int options, const unsigned char *table)
Initialize pattern compile options.
Definition: mire.c:121
rpmtime_t rpmswAdd(rpmop to, rpmop from)
Sum statistic counters.
Definition: rpmsw.c:280
#define VERSION
Definition: config.h:1270
int Stat(const char *path, struct stat *st)
stat(2) clone.
Definition: rpmrpc.c:1361
dee_e
Actions for the -d option.
Definition: rpmgrep.c:115
int errno
int main(int argc, char **argv)
Definition: rpmgrep.c:1413
FD_t fdDup(int fdno)
Definition: rpmio.c:264
static const unsigned utf8_table3[]
UTF-8 tables - used only when the newline setting is "any".
Definition: rpmgrep.c:194
FN_e
Values for the "filenames" variable, which specifies options for file name output.
Definition: rpmgrep.c:132
void rpmswPrint(const char *name, rpmop op, FILE *fp)
Print operation statistics.
Definition: rpmsw.c:304
static rpmop fdstat_op(FD_t fd, fdOpX opx)
static int pcregrep(FD_t fd, const char *printname)
Definition: rpmgrep.c:509
static enum DEE_e DEE_action
Definition: rpmgrep.c:122
static const char * color_string
Definition: rpmgrep.c:77
#define MBUFTHIRD
Definition: rpmgrep.c:60
static int nexcludes
Definition: rpmgrep.c:98
#define fdGetFILE(_fd)
Definition: rpmio.c:157
const char * Fstrerror(FD_t fd)
strerror(3) clone.
Definition: rpmio.c:2401
void * xcalloc(size_t nmemb, size_t size)
Definition: rpmmalloc.c:301
int count
Definition: rpmsw.h:35
static int pattern_count
Definition: rpmgrep.c:91
#define TRUE
Definition: rpmgrep.c:53
#define N_(Text)
Definition: system.h:490
Definition: mire.h:37
int argvCount(const ARGV_t argv)
Return no.
Definition: argv.c:71
Definition: mire.h:37
static int xisspace(int c)
Definition: rpmiotypes.h:446
ARGV_t argvFree(ARGV_t argv)
Destroy an argv array.
Definition: argv.c:44
struct miRE_s * miRE
Definition: mire.h:60
#define GF_ISSET(_FLAG)
Definition: rpmgrep.c:137
static int nincludes
Definition: rpmgrep.c:105
static ARGV_t include_patterns
Definition: rpmgrep.c:101
static const char utf8_table4[]
Definition: rpmgrep.c:200
static enum FN_e filenames
Definition: rpmgrep.c:134
static miRE excludeMire
Definition: rpmgrep.c:96
static int both_context
Definition: rpmgrep.c:112
static void fwrite_check(const void *ptr, size_t size, size_t nmemb, FILE *stream)
Definition: rpmgrep.c:63
The FD_t File Handle data structure.
int mireRegexec(miRE mire, const char *val, size_t vallen)
Execute pattern match.
Definition: mire.c:398
int rpmswEnter(rpmop op, ssize_t rc)
Enter timed operation.
Definition: rpmsw.c:248
size_t Fread(void *buf, size_t size, size_t nmemb, FD_t fd)
fread(3) clone.
Definition: rpmio.c:2412
#define dirent
Definition: system.h:230
enum rpmMireMode_e rpmMireMode
Tag value pattern match mode.
static rpmMireMode grepMode
Definition: rpmgrep.c:168
int Fclose(FD_t fd)
fclose(3) clone.
Definition: rpmio.c:2534
Cumulative statistics for an operation.
Definition: rpmsw.h:33
int mireStudy(miRE mire, int nmires)
Study PCRE patterns (if any).
Definition: mire.c:576
static ARGV_t exclude_patterns
Definition: rpmgrep.c:94
grepFlags_e
Definition: rpmgrep.c:139
unsigned BOOL
Definition: rpmgrep.c:51
static const char * previous_line(const char *p, const char *startptr)
Definition: rpmgrep.c:372
int Ferror(FD_t fd)
ferror(3) clone.
Definition: rpmio.c:2944
static miRE includeMire
Definition: rpmgrep.c:103
mireEL_t _mireEL
Definition: mire.c:24
int mireSetLocale(miRE mire, const char *locale)
Compile locale-specific PCRE tables.
Definition: mire.c:278
static struct rpmop_s grep_totalops
Definition: rpmgrep.c:172
static int snprintf(char *buf, int nb, const char *fmt,...)
Definition: rpmps.c:220
#define _GFB(n)
Definition: rpmgrep.c:136
static const char * prefix[]
Tables for prefixing and suffixing patterns, according to the -w, -x, and -F options.
Definition: rpmgrep.c:183
static const char * stdin_name
Definition: rpmgrep.c:81
static int mireLoadPatternFiles(ARGV_t files)
Load patterns from files.
Definition: rpmgrep.c:1154
Definition: mire.h:37
static void * _free(const void *p)
Wrapper to free(3), hides const compilation noise, permit NULL, return NULL.
Definition: rpmiotypes.h:647
#define MAX_PATTERN_COUNT
Definition: rpmgrep.c:55
static enum dee_e dee_action
Definition: rpmgrep.c:117
static void do_after_lines(int lastmatchnumber, const char *lastmatchrestart, const char *endptr, const char *printname)
Definition: rpmgrep.c:472
int _rpmsw_stats
Definition: rpmsw.c:20
static const char * end_of_line(const char *p, const char *endptr, size_t *lenptr)
Definition: rpmgrep.c:222
static struct poptOption optionsTable[]
Definition: rpmgrep.c:1293
#define _(Text)
Definition: system.h:30
static struct rpmop_s grep_readops
Definition: rpmgrep.c:174
static int grep_or_recurse(const char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
Definition: rpmgrep.c:931
ARGstr_t * ARGV_t
Definition: argv.h:9
int mireSetGOptions(const char *newline, int caseless, int multiline, int utf8)
Initialize pattern global options (PCRE only).
Definition: mire.c:180
struct poptOption rpmioAllPoptTable[]
Definition: poptIO.c:551
static const char * locale
Definition: rpmgrep.c:84
poptContext rpmioFini(poptContext optCon)
Definition: poptIO.c:719
static miRE pattern_list
Definition: rpmgrep.c:89
static const char * newline
Definition: rpmgrep.c:74
static BOOL compile_single_pattern(const char *pattern, const char *filename, int count)
Definition: rpmgrep.c:1072
static int after_context
Definition: rpmgrep.c:108
#define FALSE
Definition: rpmgrep.c:52
static BOOL compile_pattern(const char *pattern, const char *filename, int count)
Definition: rpmgrep.c:1128
static int error_count
Definition: rpmgrep.c:125
static void grepArgCallback(poptContext con, enum poptCallbackReason reason, const struct poptOption *opt, const char *arg, void *data)
Definition: rpmgrep.c:1215
Definition: mire.h:37