#include #include #include #include #include #include "buf.h" #include "sb.h" #include "ll.h" static char* line(void); static char* wordset(char* txt); static void wsclean(char* txt); static char* norm(void); static FILE* getfile(int argc, char** argv){ if (argc >= 2) return fopen(argv[1], "r"); else return stdin; } static char* chrmult(char c, size_t ct){ char* str = malloc(sizeof(char)*(ct+1)); str[ct] = 0; memset(str, c, ct); return str; } // file read & buffer fill helpers static FILE* in; static buf* inbuf; static int addchr(void){ int c = fgetc(in); if (c != EOF) inschrbuf(inbuf, c); return c; } static size_t fillbuf(size_t len){ // returns real len int c; size_t set; for (set = buflen(inbuf); set < len && (c = addchr()) != EOF;set++); return set; } static size_t chrfill(char chr){ size_t len = buflen(inbuf); char* str = peekstrbuf(inbuf, 0, len); char* line = memchr(str,chr,len); free(str); if (line) return line-str+1; for (int c = 0; c != chr && (c = addchr()) != EOF; len++); return len; } #define min(a,b) a < b ? a : b size_t chrnfill(char chr, size_t sz){//fills to first of chr or sz size_t len = buflen(inbuf); size_t lim = min(len,sz); char* str = peekstrbuf(inbuf, 0, lim); char* line = memchr(str, chr, lim); free(str); if (line) return line-str+1; for (int c = 0; c != chr && (c = addchr()) != EOF&& len= width) return strdup(txt); int max = width/2 + len/2; // excluding terminator int min = max-len+1; // first index of *txt char* str = malloc(sizeof(char)*(max + 2)); memset(str, ' ', min); memcpy(str+min, txt, len); str[max] = '\n'; str[max+1] = 0; return str; } static char* setwidth(char* txt){ if (isdigit(*txt)) sscanf(txt, "%d", &width); return NULL; } static char* fillline(char* txt){ char* str = malloc(sizeof(char)*(width+2)); str[width] = '\n'; str[width+1] = 0; memset(str, txt[0], width); return str; } static char* token(char* txt, char c) { char* end = strchr(txt, c); if (end) { *end = 0; return end+1; } return NULL; } static char* leader(char* txt){ char *start, *repeat, *end, *fin; start = txt; if ( !(repeat = token(start, '|') ) ) { repeat = " . "; end = ""; } else if ( !(end = token(repeat, '|') ) ) { end = repeat; repeat = " . "; } int startlen = strlen(start); int endlen = strlen(end); int rptln = strlen(repeat); if (startlen + endlen > width) { fin = malloc(sizeof(char)*(startlen+endlen+2)); memcpy(fin, start, startlen); fin[startlen] = ' '; memcpy(fin+startlen+1, end, endlen+1); return fin; } fin = malloc(sizeof(char)*(width+2)); fin[width] = '\n'; fin[width+1] = 0; strcpy(fin, start); size_t max = width-endlen+1; strcpy(fin+max, end); for (int i=startlen; inext; node->next = tmp->next; free(tmp->str); free(tmp); if (tmp == node) return NULL; return node; } static void multbreak(sb* buffer, size_t start, size_t len, llnode** tail, size_t* ct){ for (size_t i = *ct; i > 0; i--){ char* str = (*tail)->next->str + start; // tail->next is used so it can be gracefully freed if (strlen(str) < len){ // if node is smaller than len, insstr(buffer, str); // put the whole thing in the buffer // and remove the node, switching to next *tail = freenext(*tail); (*ct)--; } else { char tmp = str[len]; str[len] = 0; // truncate str at max length insstr(buffer, str); // insert that insstr(buffer, "\n"); str[len] = tmp; // restore string *tail = (*tail)->next; // iter } } } static char* foldlines(llnode* tail, size_t ct){ sb* buf = newsb(100); int oldbrk = 0, brk = -1; for (size_t i = 0; tail; i++){ // check every column until all rows // are removed. bool valid = true; // if stays true, this is a space bool done = true; // if stays true, all strings are complete llnode* orig = tail; do { // check every string if (!tail->end && tail->str[i] == 0) tail->end = true; // ignore strings that have already finished else if (!tail->end){ done = false; if (tail->str[i] != ' ') valid = false; // can't break here because tail needs to return to orig } tail = tail->next; } while (tail != orig); if (valid) brk = i; if (done || (brk != -1 && !valid && i-oldbrk >= width)){ // either all strings are done // or it has hit a non-space column and accum enough space multbreak(buf, oldbrk, brk-oldbrk+1, &tail, &ct); oldbrk = brk+1; // tells offset from previous cut brk = -1; } } return decompose(buf); } static llnode* accumlines(size_t* ct){ // returns tail of linked list llnode *head, *tail; head = tail = appendll(NULL, NULL); (*ct) = 0; while (!endgroup){ char* ln = line(); if (!ln[0]) { // ln == "" free(ln); break; } char* next; //greater than nl while ( (next = strchr(ln, '\n')) ) { next++; char sub = *next; *next = 0; tail = appendll(tail, ln); (*ct)++; *next = sub; ln = next; } } tail->next = head->next; // loop linked list free(head); endgroup = false; // prevents false positive on next run return tail; } static char* lineset(char* txt){ size_t ct; llnode* tail = accumlines(&ct); return foldlines(tail, ct); } static char* emptalloc(void){ // need a null string so it can be freed later char* str = malloc(sizeof(char)); str[0] = 0; return str; } static char* fingroup(char* txt){ endgroup = true; return emptalloc(); } static char* vert(char* txt){ int ln; if (isdigit(*txt)) sscanf(txt, "%d", &ln); else ln = 1; return chrmult('\n', ln); } static char* nbrkspc(char* txt){ if (txt[0] == '\n') txt[0] = 0; if (txt[1] == '\n') txt[1] = 0; if (txt[0] != 0 && txt[1] != 0) nbsp = txt[1]; else if (txt[0] != 0) nbsp = txt[0]; else nbsp = 0; return NULL; } static char* join = NULL; static char* merge(char* txt){ // merges until an endgroup join = txt; for (char* pos = txt; *pos != '\0'; pos++) { *txt = *pos; if (*pos != '\\') txt++; } *(txt-1) = 0; join = strdup(join); return NULL; } static char* nomrg(char* txt){ join = NULL; return NULL; } int indlevel = 0; static char* indent(char* txt){ int orig = indlevel; if (isdigit(*txt)) sscanf(txt, "%d", &indlevel); else indlevel = 0; // REPEATED width += orig-indlevel; return NULL; } static char* cmds[] = // MUST be sorted alphabetically {"CT", "EG", "FIL", "IND", "LD", "LS", "MRG", "NBSP", "NMRG", "V", "W"}; static char* (*call[])(char* txt) = {center, fingroup, fillline, indent, leader, lineset, merge, nbrkspc, nomrg, vert, setwidth}; static char* cmd(void){ char* dat = norm(); int low = 0; int high = sizeof(cmds)/sizeof(*cmds) - 1; //max char* proc = NULL; bool found = false; while (high >= low){ int mid = ((unsigned int)low + (unsigned int)high) >> 1; char* mval = cmds[mid]; int cmp = strncmp(dat, mval, strlen(mval)); if (cmp < 0) // dat < mval high = mid - 1; else if (cmp > 0) low = mid + 1; else{ int len = strlen(mval); found = true; consumews(dat+len); proc = call[mid](dat+len); break; } } if (found) { free(dat); return proc; } else { fprintf(stderr, "INVALID COMMAND .%s\n",dat); exit(1); } } // normal typesetting void wsclean(char* txt){ int nl = 0; // count of newlines size_t i; for (i = strlen(txt); i > 0; i--) { // start at end [i-1] and dec if (txt[i-1] == '\n') nl++; // count up newlines else if (txt[i-1] != ' ') break; // and break on unpadded text } memset(txt+i, '\n', nl); // add #nl newlines at [i, i+nl) txt[i+nl] = 0; // finish string } #define nbspchr (char) 255 static void nbspclean(char* txt){ for (; *txt != 0; txt++){ if (*txt == nbspchr) *txt = ' '; } } static void wsadd(char** txt, int lnct) { int len = strlen(*txt); *txt = realloc(*txt, sizeof(char)*(len + lnct*indlevel + 1)); char* nl = *txt; while (*nl) { nl += indlevel; memmove(nl, nl-indlevel, len+1); memset(nl-indlevel, ' ', indlevel); char* last = nl; nl = (char*)memchr(nl, '\n', len)+1; len -= nl-last; } } char* wordset(char* txt){ char* orig = txt; // txt will be manipulated int ws = 0; // zero means do not cut int ln = 0; for (int i=0; txt[i] != 0; i++){ // turn into a nested loop if (txt[i] == ' ' || txt[i] == '\n') ws = i; // nl's (multiline input) and spaces are proper linebreaks if (txt[i] == '\n' || (ws && i >= width && ws != i) ){ txt += ws; txt[0] = '\n'; ws = 0; i = 0; ln++; } } wsadd(&orig, ln); wsclean(orig); // remove trailing whitespace nbspclean(orig); return orig; } static char* norm(void){ int len = chrfill('\n'); // len = strlen(out) = sizeof(out)-1 char* out = popstrbuf(inbuf,len); // out[len-1] = '\n' if (join) { out = realloc(out, len+strlen(join)-1); strcpy(out+len-1, /* \n loc */ join); } wsclean(out); return out; } // a parser to choose when to typeset and when to run a command static void nbspsub(char* txt){ for (; (*txt) != 0; txt++){ if (*txt == nbsp) *txt = nbspchr; } } char* line(void){ size_t sz; if ( (sz = fillbuf(2)) == 0) return ""; char twobytes[3]; char* peek = peekstrbuf(inbuf, 0, 2); // .., .\n, or ^.? memcpy(twobytes, peek, 3); free(peek); if (sz == 1 || twobytes[1] == '\n') return norm(); if (twobytes[0] == '.') popchrbuf(inbuf); char* out; if (twobytes[0] == '.' && twobytes[1] != '.'){ char* data = cmd(); if (data) return data; out = line(); } else { out = norm(); } nbspsub(out); return out; } // orchestration int main(int argc, char** argv){ in = getfile(argc, argv); inbuf = newbuf(256); if (in == NULL){ perror(argv[1]); return 1; } char* out; while ( (out = line())[0] != '\0'){ out = wordset(out); printf("%s",out); free(out); } fclose(in); return 0; }