Fossil: Documentation

/*
** Copyright (c) 2002 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the Simplified BSD License (also
** known as the "2-Clause License" or "FreeBSD License".)
**
** This program is distributed in the hope that it will be useful,
** but without any warranty; without even the implied warranty of
** merchantability or fitness for a particular purpose.
**
** Author contact information:
**   drh@hwaci.com
**   http://www.hwaci.com/drh/
**
*******************************************************************************
**
** SYNOPSIS:
**
** Input lines that begin with the "@" character are translated into
** either cgi_printf() statements or string literals and the
** translated code is written on standard output.
**
** The problem this program is attempt to solve is as follows:  When
** writing CGI programs in C, we typically want to output a lot of HTML
** text to standard output.  In pure C code, this involves doing a
** printf() with a big string containing all that text.  But we have
** to insert special codes (ex: \n and \") for many common characters,
** which interferes with the readability of the HTML.
**
** This tool allows us to put raw HTML, without the special codes, in
** the middle of a C program.  This program then translates the text
** into standard C by inserting all necessary backslashes and other
** punctuation.
**
** Enhancement #1:
**
** If the last non-whitespace character prior to the first "@" of a
** @-block is "=" or "," then the @-block is a string literal initializer
** rather than text that is to be output via cgi_printf().  Render it
** as such.
**
** Enhancement #2:
**
** Comments of the form:  "|* @-comment: CC" (where "|" is really "/")
** cause CC to become a comment character for the @-substitution.
** Typical values for CC are "--" (for SQL text) or "#" (for Tcl script)
** or "//" (for C++ code).  Lines of subsequent @-blocks that begin with
** CC are omitted from the output.
**
** Enhancement #3:
**
** If a non-enhancement #1 line ends in backslash, the backslash and the
** newline (\n) are not included in the argument to cgi_printf().  This
** is used to split one long output line across multiple source lines.
*/
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

/*
** Space to hold arguments at the end of the cgi_printf()
*/
#define MX_ARG_SP 10000
static char zArg[MX_ARG_SP];
static int nArg = 0;

/*
** True if we are currently in a cgi_printf()
*/
static int inPrint = 0;

/*
** True if we are currently doing a free string
*/
static int inStr = 0;

/*
** Name of files being processed
*/
static const char *zInFile = "(stdin)";

/*
** Terminate an active cgi_printf() or free string
*/
static void end_block(FILE *out){
  if( inPrint ){
    zArg[nArg] = 0;
    fprintf(out, "%s);\n", zArg);
    nArg = 0;
    inPrint = 0;
  }
}

/*
** Translate the input stream into the output stream
*/
static void trans(FILE *in, FILE *out){
  int i, j, k;          /* Loop counters */
  char c1, c2;          /* Characters used to start a comment */
  int lastWasEq = 0;    /* True if last non-whitespace character was "=" */
  int lastWasComma = 0; /* True if last non-whitespace character was "," */
  int lineNo = 0;       /* Line number */
  char zLine[2000];     /* A single line of input */
  char zOut[4000];      /* The input line translated into appropriate output */

  c1 = c2 = '-';
  while( fgets(zLine, sizeof(zLine), in) ){
    lineNo++;
    for(i=0; zLine[i] && isspace(zLine[i]); i++){}
    if( zLine[i]!='@' ){
      if( inPrint || inStr ) end_block(out);
      fprintf(out,"%s",zLine);
                       /* 0123456789 12345 */
      if( strncmp(zLine, "/* @-comment: ", 14)==0 ){
        c1 = zLine[14];
        c2 = zLine[15];
      }
      i += strlen(&zLine[i]);
      while( i>0 && isspace(zLine[i-1]) ){ i--; }
      lastWasEq    = i>0 && zLine[i-1]=='=';
      lastWasComma = i>0 && zLine[i-1]==',';
    }else if( lastWasEq || lastWasComma){
      /* If the last non-whitespace character before the first @ was
      ** an "="(var init/set) or a ","(const definition in list) then
      ** generate a string literal.  But skip comments
      ** consisting of all text between c1 and c2 (default "--")
      ** and end of line.
      */
      int indent, omitline;
      char *zNewline = "\\n";
      i++;
      if( isspace(zLine[i]) ){ i++; }
      indent = i - 2;
      if( indent<0 ) indent = 0;
      omitline = 0;
      for(j=0; zLine[i] && zLine[i]!='\r' && zLine[i]!='\n'; i++){
        if( zLine[i]==c1 && (c2==' ' || zLine[i+1]==c2) ){
           omitline = 1; break;
        }
        if( zLine[i]=='\\' && (zLine[i+1]==0 || zLine[i+1]=='\r'
                                 || zLine[i+1]=='\n') ){
          zLine[i] = 0;
          zNewline = "";
          /* fprintf(stderr, "%s:%d: omit newline\n", zInFile, lineNo); */
          break;
        }
        if( zLine[i]=='\\' || zLine[i]=='"' ){ zOut[j++] = '\\'; }
        zOut[j++] = zLine[i];
      }
      if( zNewline[0] ) while( j>0 && isspace(zOut[j-1]) ){ j--; }
      zOut[j] = 0;
      if( j<=0 && omitline ){
        fprintf(out,"\n");
      }else{
        fprintf(out,"%*s\"%s%s\"\n",indent, "", zOut, zNewline);
      }
    }else{
      /* Otherwise (if the last non-whitespace was not '=') then generate a
      ** cgi_printf() statement whose format is the text following the '@'.
      ** Substrings of the form "%C(...)" (where C is any sequence of characters
      ** other than \000 and '(') will put "%C" in the format and add the
      ** "(...)" as an argument to the cgi_printf call.  Each '*' character
      ** present in C (max two) causes one more "(...)" sequence to be consumed.
      ** For example, "%*.*d(4)(2)(1)" converts to "%*.*d" with arguments "4",
      ** "2", and "1", which will be used as the field width, precision, and
      ** value, respectively, producing a final formatted result of "  01".
      */
      const char *zNewline = "\\n";
      int indent;
      int nC;
      int nParam;
      char c;
      i++;
      if( isspace(zLine[i]) ){ i++; }
      indent = i;
      for(j=0; zLine[i] && zLine[i]!='\r' && zLine[i]!='\n'; i++){
        if( zLine[i]=='\\' && (!zLine[i+1] || zLine[i+1]=='\r'
                                           || zLine[i+1]=='\n') ){
          zNewline = "";
          break;
        }
        if( zLine[i]=='"' || zLine[i]=='\\' ){ zOut[j++] = '\\'; }
        zOut[j++] = zLine[i];
        if( zLine[i]!='%' || zLine[i+1]=='%' || zLine[i+1]==0 ) continue;
        nParam=1;
        for(nC=1; zLine[i+nC] && zLine[i+nC]!='('; nC++){
          if( zLine[i+nC]=='*' && nParam < 3 ) nParam++;
        }
        if( zLine[i+nC]!='(' || !isalpha(zLine[i+nC-1]) ) continue;
        while( --nC ) zOut[j++] = zLine[++i];
        do{
          zArg[nArg++] = ',';
          k = 0; i++;
          if( zLine[i]!='(' ) break;
          while( (c = zLine[i])!=0 ){
            zArg[nArg++] = c;
            if( c==')' ){
              k--;
              if( k==0 ) break;
            }else if( c=='(' ){
              k++;
            }
            i++;
          }
        }while( --nParam );
      }
      zOut[j] = 0;
      if( !inPrint ){
        fprintf(out,"%*scgi_printf(\"%s%s\"",indent-2,"", zOut, zNewline);
        inPrint = 1;
      }else{
        fprintf(out,"\n%*s\"%s%s\"",indent+5, "", zOut, zNewline);
      }
    }
  }
}

static void print_source_ref(const char *zSrcFile, FILE *out){
/* Set source line reference to the original source file.
 * This makes compiler show the original file name in the compile error
 * messages, instead of referring to the translated file.
 * NOTE: This somewhat complicates stepping in debugger, as the resuling
 * code would not match the referenced sources.
 */
#ifndef FOSSIL_DEBUG
  const char *arg;
  if( !*zSrcFile ){
    return;
  }
  fprintf(out,"#line 1 \"");
  for(arg=zSrcFile; *arg; arg++){
    if( *arg!='\\' ){
      fprintf(out,"%c", *arg);
    }else{
      fprintf(out,"\\\\");
    }
  }
  fprintf(out,"\"\n");
#endif
}

int main(int argc, char **argv){
  if( argc==2 ){
    FILE *in = fopen(argv[1], "r");
    if( in==0 ){
      fprintf(stderr,"can not open %s\n", argv[1]);
      exit(1);
    }
    zInFile = argv[1];
    print_source_ref(zInFile, stdout);
    trans(in, stdout);
    fclose(in);
  }else{
    trans(stdin, stdout);
  }
  return 0;
}