123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603 |
- /*++
- Copyright (c) 2013 Minoca Corp. All Rights Reserved
- Module Name:
- uniq.c
- Abstract:
- This module implements the uniq utility, which removes adjacent duplicate
- lines.
- Author:
- Evan Green 9-Sep-2013
- Environment:
- POSIX
- --*/
- //
- // ------------------------------------------------------------------- Includes
- //
- #include <minoca/lib/types.h>
- #include <assert.h>
- #include <ctype.h>
- #include <errno.h>
- #include <getopt.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include "swlib.h"
- //
- // ---------------------------------------------------------------- Definitions
- //
- #define UNIQ_VERSION_MAJOR 1
- #define UNIQ_VERSION_MINOR 0
- #define UNIQ_USAGE \
- "usage: uniq [-cdu] [-f fields] [-s char] [input_file [output_file]]\n" \
- "The uniq utility reads an input file, comparing adjacent lines, and \n" \
- "writes one unique copy of each input line to the output. The input and \n"\
- "output file operands are optional. If an input is not supplied or if \n" \
- "it is -, then standard in will be used. Options are:\n" \
- " -c, --count -- Precede each output line with the number of " \
- "occurrences.\n" \
- " -d, --repeated -- Suppress the writing of lines that are not \n" \
- " repeated in the input.\n" \
- " -D, --all-repeated=type -- Print repeated lines. Type can be none, \n" \
- " prepend to print a delimiter before every repeated group, or \n" \
- " separate to print a newline before every repeated group except \n" \
- " the first.\n" \
- " -f, --skip-fields N -- Avoid comparing the first N fields. Fields are\n"\
- " separated by blanks.\n" \
- " -i, --ignore-case -- Ignore case when comparing.\n" \
- " -s, --skip-chars N -- Avoid comparing the first N characters.\n" \
- " -u, --unique -- Suppress the writing of lines that are repeated in \n" \
- " the input.\n" \
- " -w, --check-chars=N -- Only check the first N characters.\n" \
- " -z, --zero-terminated -- Separate lines with zero bytes rather than " \
- "newlines.\n" \
- " --help -- Show this help text and exit.\n" \
- " --version -- Show the application version and exit.\n" \
- #define UNIQ_OPTIONS_STRING "cdDf:is:uw:zhV"
- //
- // Define uniq options.
- //
- //
- // Set this option to precede each output line with a count.
- //
- #define UNIQ_OPTION_PRINT_COUNT 0x00000001
- //
- // Set this flag to skip writing lines that are not repeated.
- //
- #define UNIQ_OPTION_SUPPRESS_UNIQUE 0x00000002
- //
- // Set this flag to ignore case when comparing.
- //
- #define UNIQ_OPTION_IGNORE_CASE 0x00000004
- //
- // Set this flag to suppress repeated lines in the input.
- //
- #define UNIQ_OPTION_SUPPRESS_REPEATED 0x00000008
- //
- // Set this flag to print the second and subsequent repeated lines.
- //
- #define UNIQ_OPTION_ALL_REPEATED 0x00000010
- //
- // ------------------------------------------------------ Data Type Definitions
- //
- typedef enum _UNIQ_GROUPING {
- UniqGroupNone,
- UniqGroupPrepend,
- UniqGroupSeparate
- } UNIQ_GROUPING, *PUNIQ_GROUPING;
- //
- // ----------------------------------------------- Internal Function Prototypes
- //
- PSTR
- UniqSkip (
- PSTR Input,
- ULONG FieldCount,
- ULONG CharacterCount
- );
- //
- // -------------------------------------------------------------------- Globals
- //
- struct option UniqLongOptions[] = {
- {"count", no_argument, 0, 'c'},
- {"repeated", no_argument, 0, 'd'},
- {"all-repeated", optional_argument, 0, 'D'},
- {"skip-fields", required_argument, 0, 'f'},
- {"ignore-case", no_argument, 0, 'i'},
- {"skip-chars", required_argument, 0, 's'},
- {"unique", no_argument, 0, 'u'},
- {"check-chars", required_argument, 0, 'w'},
- {"zero-terminated", no_argument, 0, 'z'},
- {"help", no_argument, 0, 'h'},
- {"version", no_argument, 0, 'V'},
- {NULL, 0, 0, 0},
- };
- //
- // ------------------------------------------------------------------ Functions
- //
- INT
- UniqMain (
- INT ArgumentCount,
- CHAR **Arguments
- )
- /*++
- Routine Description:
- This routine is the main entry point for the cp utility.
- Arguments:
- ArgumentCount - Supplies the number of command line arguments the program
- was invoked with.
- Arguments - Supplies a tokenized array of command line arguments.
- Return Value:
- Returns an integer exit code. 0 for success, nonzero otherwise.
- --*/
- {
- PSTR AfterScan;
- PSTR Argument;
- ULONG ArgumentIndex;
- size_t CharacterCount;
- INT Comparison;
- BOOL FirstGroup;
- FILE *Input;
- PSTR InputName;
- PSTR Line;
- PSTR LineStart;
- INT Option;
- ULONG Options;
- FILE *Output;
- PSTR OutputName;
- PSTR PreviousLine;
- PSTR PreviousLineStart;
- BOOL PrintLine;
- ULONG RepeatCount;
- UNIQ_GROUPING RepeatGroup;
- CHAR Separator;
- LONG SkipCharacters;
- LONG SkipFields;
- int Status;
- CharacterCount = -1;
- FirstGroup = TRUE;
- Input = NULL;
- Line = NULL;
- PreviousLine = NULL;
- Options = 0;
- Output = NULL;
- RepeatGroup = UniqGroupNone;
- Separator = '\n';
- SkipCharacters = 0;
- SkipFields = 0;
- //
- // Process the control arguments.
- //
- while (TRUE) {
- Option = getopt_long(ArgumentCount,
- Arguments,
- UNIQ_OPTIONS_STRING,
- UniqLongOptions,
- NULL);
- if (Option == -1) {
- break;
- }
- if ((Option == '?') || (Option == ':')) {
- Status = 1;
- goto MainEnd;
- }
- switch (Option) {
- case 'c':
- Options |= UNIQ_OPTION_PRINT_COUNT;
- break;
- case 'd':
- Options |= UNIQ_OPTION_SUPPRESS_UNIQUE;
- break;
- case 'D':
- Options |= UNIQ_OPTION_ALL_REPEATED | UNIQ_OPTION_SUPPRESS_UNIQUE;
- if (optarg != NULL) {
- if (strcmp(optarg, "none") == 0) {
- RepeatGroup = UniqGroupNone;
- } else if (strcmp(optarg, "prepend") == 0) {
- RepeatGroup = UniqGroupPrepend;
- } else if (strcmp(optarg, "separate") == 0) {
- RepeatGroup = UniqGroupSeparate;
- } else {
- SwPrintError(0, optarg, "Unknown grouping type");
- Status = 1;
- goto MainEnd;
- }
- }
- break;
- case 'i':
- Options |= UNIQ_OPTION_IGNORE_CASE;
- break;
- case 'u':
- Options |= UNIQ_OPTION_SUPPRESS_REPEATED;
- break;
- case 'f':
- Argument = optarg;
- assert(Argument != NULL);
- SkipFields = strtol(Argument, &AfterScan, 10);
- if ((SkipFields < 0) || (AfterScan == Argument)) {
- SwPrintError(0, Argument, "Invalid field count");
- return 1;
- }
- break;
- case 's':
- Argument = optarg;
- assert(Argument != NULL);
- SkipCharacters = strtol(Argument, &AfterScan, 10);
- if ((SkipCharacters < 0) || (AfterScan == Argument)) {
- SwPrintError(0, Argument, "Invalid character count");
- return 1;
- }
- break;
- case 'w':
- Argument = optarg;
- CharacterCount = strtoul(Argument, &AfterScan, 10);
- if (AfterScan == Argument) {
- SwPrintError(0, Argument, "Invalid character count");
- return 1;
- }
- break;
- case 'z':
- Separator = '\0';
- break;
- case 'V':
- SwPrintVersion(UNIQ_VERSION_MAJOR, UNIQ_VERSION_MINOR);
- return 1;
- case 'h':
- printf(UNIQ_USAGE);
- return 1;
- default:
- assert(FALSE);
- Status = 1;
- goto MainEnd;
- }
- }
- if (((Options & UNIQ_OPTION_ALL_REPEATED) != 0) &&
- ((Options & UNIQ_OPTION_PRINT_COUNT) != 0)) {
- SwPrintError(0, NULL, "-D and -c together is invalid");
- Status = 1;
- goto MainEnd;
- }
- //
- // Get the optional input and output names.
- //
- ArgumentIndex = optind;
- if (ArgumentIndex > ArgumentCount) {
- ArgumentIndex = ArgumentCount;
- }
- InputName = NULL;
- OutputName = NULL;
- if (ArgumentIndex < ArgumentCount) {
- InputName = Arguments[ArgumentIndex];
- if (strcmp(InputName, "-") == 0) {
- InputName = NULL;
- }
- ArgumentIndex += 1;
- if (ArgumentIndex < ArgumentCount) {
- OutputName = Arguments[ArgumentIndex];
- ArgumentIndex += 1;
- if (ArgumentIndex < ArgumentCount) {
- SwPrintError(0, Arguments[ArgumentIndex], "Too many arguments");
- return 1;
- }
- }
- }
- if (InputName == NULL) {
- Input = stdin;
- } else {
- Input = fopen(InputName, "r");
- if (Input == NULL) {
- Status = errno;
- SwPrintError(Status, InputName, "Unable to open");
- goto MainEnd;
- }
- }
- if (OutputName == NULL) {
- Output = stdout;
- } else {
- Output = fopen(OutputName, "r");
- if (Output == NULL) {
- Status = errno;
- SwPrintError(Status, OutputName, "Unable to open");
- goto MainEnd;
- }
- }
- //
- // Loop processing the files.
- //
- Status = SwReadLine(Input, &PreviousLine);
- if ((Status != 0) || (PreviousLine == NULL)) {
- goto MainEnd;
- }
- RepeatCount = 1;
- while (TRUE) {
- if (feof(Input) != 0) {
- Line = NULL;
- Comparison = 1;
- } else {
- Status = SwReadLine(Input, &Line);
- if (Status != 0) {
- goto MainEnd;
- }
- if (Line == NULL) {
- Comparison = 1;
- } else {
- LineStart = UniqSkip(Line, SkipFields, SkipCharacters);
- PreviousLineStart = UniqSkip(PreviousLine,
- SkipFields,
- SkipCharacters);
- if ((Options & UNIQ_OPTION_IGNORE_CASE) != 0) {
- Comparison = strncasecmp(LineStart,
- PreviousLineStart,
- CharacterCount);
- } else {
- Comparison = strncmp(LineStart,
- PreviousLineStart,
- CharacterCount);
- }
- }
- }
- //
- // Handle the lines being equal (duplicate).
- //
- if (Comparison == 0) {
- //
- // Print if repeated lines are requested.
- //
- if ((Options & UNIQ_OPTION_ALL_REPEATED) != 0) {
- //
- // Separate groups of repeated lines if requested. The only
- // difference bewteen prepend and separate is that separate
- // doesn't print a delimiter before the first group.
- //
- if (RepeatCount == 1) {
- if ((RepeatGroup == UniqGroupPrepend) ||
- ((RepeatGroup == UniqGroupSeparate) &&
- (FirstGroup == FALSE))) {
- putchar(Separator);
- }
- FirstGroup = FALSE;
- }
- //
- // Print the line.
- //
- PrintLine = TRUE;
- RepeatCount += 1;
- //
- // Skip the repeated line normally.
- //
- } else {
- free(Line);
- Line = NULL;
- RepeatCount += 1;
- continue;
- }
- } else {
- //
- // They're not equal, so spit this line out.
- //
- PrintLine = TRUE;
- if (RepeatCount == 1) {
- if ((Options & UNIQ_OPTION_SUPPRESS_UNIQUE) != 0) {
- PrintLine = FALSE;
- }
- } else {
- if ((Options & UNIQ_OPTION_SUPPRESS_REPEATED) != 0) {
- PrintLine = FALSE;
- }
- }
- }
- if (PrintLine != FALSE) {
- if ((Options & UNIQ_OPTION_PRINT_COUNT) != 0) {
- printf("%7d %s%c", RepeatCount, PreviousLine, Separator);
- } else {
- printf("%s%c", PreviousLine, Separator);
- }
- }
- //
- // Move the current line to the previous line.
- //
- free(PreviousLine);
- PreviousLine = Line;
- if (Comparison != 0) {
- RepeatCount = 1;
- }
- if (Line == NULL) {
- break;
- }
- }
- MainEnd:
- if (Line != NULL) {
- free(Line);
- }
- if (PreviousLine != NULL) {
- free(PreviousLine);
- }
- if ((Input != NULL) && (Input != stdin)) {
- fclose(Input);
- }
- if ((Output != NULL) && (Output != stdout)) {
- fclose(Output);
- }
- return Status;
- }
- //
- // --------------------------------------------------------- Internal Functions
- //
- PSTR
- UniqSkip (
- PSTR Input,
- ULONG FieldCount,
- ULONG CharacterCount
- )
- /*++
- Routine Description:
- This routine skips a certain number of fields and/or characters, where a
- field is defined as any number of blank spaces followed by any number of
- non-blank spaces. Fields are skipped before characters.
- Arguments:
- Input - Supplies the input to advance.
- FieldCount - Supplies the number of fields to skip.
- CharacterCount - Supplies the number of characters to skip.
- Return Value:
- Returns a pointer within the string advanced past the specified number of
- fields and/or characters..
- --*/
- {
- ULONG FieldIndex;
- for (FieldIndex = 0; FieldIndex < FieldCount; FieldIndex += 1) {
- if (*Input == '\0') {
- break;
- }
- while (isblank(*Input)) {
- Input += 1;
- }
- while (!isblank(*Input)) {
- Input += 1;
- }
- }
- while ((CharacterCount != 0) && (*Input != '\0')) {
- Input += 1;
- CharacterCount -= 1;
- }
- return Input;
- }
|