/*
** Copyright (c) 2002 D. Richard Hipp
**
** This program is free software; you can redistribute it and/or
** modify it under the terms of the Simplified BSD License (also
** known as the "2-Clause License" or "FreeBSD License".)
**
** This program is distributed in the hope that it will be useful,
** but without any warranty; without even the implied warranty of
** merchantability or fitness for a particular purpose.
**
** Author contact information:
** drh@hwaci.com
** http://www.hwaci.com/drh/
**
*******************************************************************************
**
** SYNOPSIS:
**
** Input lines that begin with the "@" character are translated into
** either cgi_printf() statements or string literals and the
** translated code is written on standard output.
**
** The problem this program is attempt to solve is as follows: When
** writing CGI programs in C, we typically want to output a lot of HTML
** text to standard output. In pure C code, this involves doing a
** printf() with a big string containing all that text. But we have
** to insert special codes (ex: \n and \") for many common characters,
** which interferes with the readability of the HTML.
**
** This tool allows us to put raw HTML, without the special codes, in
** the middle of a C program. This program then translates the text
** into standard C by inserting all necessary backslashes and other
** punctuation.
**
** Enhancement #1:
**
** If the last non-whitespace character prior to the first "@" of a
** @-block is "=" or "," then the @-block is a string literal initializer
** rather than text that is to be output via cgi_printf(). Render it
** as such.
**
** Enhancement #2:
**
** Comments of the form: "|* @-comment: CC" (where "|" is really "/")
** cause CC to become a comment character for the @-substitution.
** Typical values for CC are "--" (for SQL text) or "#" (for Tcl script)
** or "//" (for C++ code). Lines of subsequent @-blocks that begin with
** CC are omitted from the output.
**
** Enhancement #3:
**
** If a non-enhancement #1 line ends in backslash, the backslash and the
** newline (\n) are not included in the argument to cgi_printf(). This
** is used to split one long output line across multiple source lines.
*/
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
/*
** Space to hold arguments at the end of the cgi_printf()
*/
#define MX_ARG_SP 10000
static char zArg[MX_ARG_SP];
static int nArg = 0;
/*
** True if we are currently in a cgi_printf()
*/
static int inPrint = 0;
/*
** True if we are currently doing a free string
*/
static int inStr = 0;
/*
** Name of files being processed
*/
static const char *zInFile = "(stdin)";
/*
** Terminate an active cgi_printf() or free string
*/
static void end_block(FILE *out){
if( inPrint ){
zArg[nArg] = 0;
fprintf(out, "%s);\n", zArg);
nArg = 0;
inPrint = 0;
}
}
/*
** Translate the input stream into the output stream
*/
static void trans(FILE *in, FILE *out){
int i, j, k; /* Loop counters */
char c1, c2; /* Characters used to start a comment */
int lastWasEq = 0; /* True if last non-whitespace character was "=" */
int lastWasComma = 0; /* True if last non-whitespace character was "," */
int lineNo = 0; /* Line number */
char zLine[2000]; /* A single line of input */
char zOut[4000]; /* The input line translated into appropriate output */
c1 = c2 = '-';
while( fgets(zLine, sizeof(zLine), in) ){
lineNo++;
for(i=0; zLine[i] && isspace(zLine[i]); i++){}
if( zLine[i]!='@' ){
if( inPrint || inStr ) end_block(out);
fprintf(out,"%s",zLine);
/* 0123456789 12345 */
if( strncmp(zLine, "/* @-comment: ", 14)==0 ){
c1 = zLine[14];
c2 = zLine[15];
}
i += strlen(&zLine[i]);
while( i>0 && isspace(zLine[i-1]) ){ i--; }
lastWasEq = i>0 && zLine[i-1]=='=';
lastWasComma = i>0 && zLine[i-1]==',';
}else if( lastWasEq || lastWasComma){
/* If the last non-whitespace character before the first @ was
** an "="(var init/set) or a ","(const definition in list) then
** generate a string literal. But skip comments
** consisting of all text between c1 and c2 (default "--")
** and end of line.
*/
int indent, omitline;
char *zNewline = "\\n";
i++;
if( isspace(zLine[i]) ){ i++; }
indent = i - 2;
if( indent<0 ) indent = 0;
omitline = 0;
for(j=0; zLine[i] && zLine[i]!='\r' && zLine[i]!='\n'; i++){
if( zLine[i]==c1 && (c2==' ' || zLine[i+1]==c2) ){
omitline = 1; break;
}
if( zLine[i]=='\\' && (zLine[i+1]==0 || zLine[i+1]=='\r'
|| zLine[i+1]=='\n') ){
zLine[i] = 0;
zNewline = "";
/* fprintf(stderr, "%s:%d: omit newline\n", zInFile, lineNo); */
break;
}
if( zLine[i]=='\\' || zLine[i]=='"' ){ zOut[j++] = '\\'; }
zOut[j++] = zLine[i];
}
if( zNewline[0] ) while( j>0 && isspace(zOut[j-1]) ){ j--; }
zOut[j] = 0;
if( j<=0 && omitline ){
fprintf(out,"\n");
}else{
fprintf(out,"%*s\"%s%s\"\n",indent, "", zOut, zNewline);
}
}else{
/* Otherwise (if the last non-whitespace was not '=') then generate a
** cgi_printf() statement whose format is the text following the '@'.
** Substrings of the form "%C(...)" (where C is any sequence of characters
** other than \000 and '(') will put "%C" in the format and add the
** "(...)" as an argument to the cgi_printf call. Each '*' character
** present in C (max two) causes one more "(...)" sequence to be consumed.
** For example, "%*.*d(4)(2)(1)" converts to "%*.*d" with arguments "4",
** "2", and "1", which will be used as the field width, precision, and
** value, respectively, producing a final formatted result of " 01".
*/
const char *zNewline = "\\n";
int indent;
int nC;
int nParam;
char c;
i++;
if( isspace(zLine[i]) ){ i++; }
indent = i;
for(j=0; zLine[i] && zLine[i]!='\r' && zLine[i]!='\n'; i++){
if( zLine[i]=='\\' && (!zLine[i+1] || zLine[i+1]=='\r'
|| zLine[i+1]=='\n') ){
zNewline = "";
break;
}
if( zLine[i]=='"' || zLine[i]=='\\' ){ zOut[j++] = '\\'; }
zOut[j++] = zLine[i];
if( zLine[i]!='%' || zLine[i+1]=='%' || zLine[i+1]==0 ) continue;
nParam=1;
for(nC=1; zLine[i+nC] && zLine[i+nC]!='('; nC++){
if( zLine[i+nC]=='*' && nParam < 3 ) nParam++;
}
if( zLine[i+nC]!='(' || !isalpha(zLine[i+nC-1]) ) continue;
while( --nC ) zOut[j++] = zLine[++i];
do{
zArg[nArg++] = ',';
k = 0; i++;
if( zLine[i]!='(' ) break;
while( (c = zLine[i])!=0 ){
zArg[nArg++] = c;
if( c==')' ){
k--;
if( k==0 ) break;
}else if( c=='(' ){
k++;
}
i++;
}
}while( --nParam );
}
zOut[j] = 0;
if( !inPrint ){
fprintf(out,"%*scgi_printf(\"%s%s\"",indent-2,"", zOut, zNewline);
inPrint = 1;
}else{
fprintf(out,"\n%*s\"%s%s\"",indent+5, "", zOut, zNewline);
}
}
}
}
static void print_source_ref(const char *zSrcFile, FILE *out){
/* Set source line reference to the original source file.
* This makes compiler show the original file name in the compile error
* messages, instead of referring to the translated file.
* NOTE: This somewhat complicates stepping in debugger, as the resuling
* code would not match the referenced sources.
*/
#ifndef FOSSIL_DEBUG
const char *arg;
if( !*zSrcFile ){
return;
}
fprintf(out,"#line 1 \"");
for(arg=zSrcFile; *arg; arg++){
if( *arg!='\\' ){
fprintf(out,"%c", *arg);
}else{
fprintf(out,"\\\\");
}
}
fprintf(out,"\"\n");
#endif
}
int main(int argc, char **argv){
if( argc==2 ){
FILE *in = fopen(argv[1], "r");
if( in==0 ){
fprintf(stderr,"can not open %s\n", argv[1]);
exit(1);
}
zInFile = argv[1];
print_source_ref(zInFile, stdout);
trans(in, stdout);
fclose(in);
}else{
trans(stdin, stdout);
}
return 0;
}