|
|
|
@ -1,5 +1,3 @@ |
|
|
|
/* Generated by re2c 0.5 on Sat Nov 27 16:22:34 1999 */ |
|
|
|
#line 1 "../../../php4/ext/standard/url_scanner.re" |
|
|
|
/* |
|
|
|
+----------------------------------------------------------------------+ |
|
|
|
| PHP version 4.0 | |
|
|
|
@ -14,7 +12,8 @@ |
|
|
|
| obtain it through the world-wide-web, please send a note to | |
|
|
|
| license@php.net so we can mail you a copy immediately. | |
|
|
|
+----------------------------------------------------------------------+ |
|
|
|
| Author: Sascha Schumann <sascha@schumann.cx> | |
|
|
|
| Author: Sascha Schumann <sascha@schumann.cx> | |
|
|
|
| Hartmut Holzgraefe <hartmut@six.de> | |
|
|
|
+----------------------------------------------------------------------+ |
|
|
|
*/ |
|
|
|
/* $Id$ */ |
|
|
|
@ -27,518 +26,270 @@ |
|
|
|
#include <stdio.h> |
|
|
|
#include <stdlib.h> |
|
|
|
#include <string.h> |
|
|
|
#include "php.h" |
|
|
|
#include "basic_functions.h" |
|
|
|
#include "url_scanner.h" |
|
|
|
|
|
|
|
#undef MIN |
|
|
|
#define MIN(a,b) (a)<(b)?(a):(b) |
|
|
|
|
|
|
|
#define YYCTYPE char |
|
|
|
#define YYCURSOR state->crs |
|
|
|
#define YYLIMIT state->end |
|
|
|
#define YYMARKER state->ptr |
|
|
|
#define YYFILL(n) |
|
|
|
|
|
|
|
typedef enum { |
|
|
|
INITIAL, |
|
|
|
REF |
|
|
|
} state; |
|
|
|
|
|
|
|
typedef struct { |
|
|
|
state state; |
|
|
|
const char *crs; |
|
|
|
const char *end; |
|
|
|
const char *ptr; |
|
|
|
const char *start; |
|
|
|
char *target; |
|
|
|
size_t targetsize; |
|
|
|
const char *data; |
|
|
|
} lexdata; |
|
|
|
|
|
|
|
#define FINISH { catchup(state); goto finish; } |
|
|
|
#define BUFSIZE 256 |
|
|
|
|
|
|
|
#define BEGIN(x) \ |
|
|
|
switch(state->state) { \ |
|
|
|
case INITIAL: \ |
|
|
|
catchup(state); \ |
|
|
|
break; \ |
|
|
|
case REF: \ |
|
|
|
screw_url(state); \ |
|
|
|
break; \ |
|
|
|
} \ |
|
|
|
state->state = x; \ |
|
|
|
state->start = state->crs; \ |
|
|
|
goto nextiter |
|
|
|
PHP_RINIT_FUNCTION(url_scanner) { |
|
|
|
url_adapt(NULL,0,NULL,NULL); |
|
|
|
} |
|
|
|
|
|
|
|
#define ATTACH(s, n) \ |
|
|
|
{ \ |
|
|
|
size_t _newlen = state->targetsize + n; \ |
|
|
|
state->target = realloc(state->target, _newlen + 1); \ |
|
|
|
memcpy(state->target + state->targetsize, s, n); \ |
|
|
|
state->targetsize = _newlen; \ |
|
|
|
state->target[_newlen] = '\0'; \ |
|
|
|
PHP_RSHUTDOWN_FUNCTION(url_scanner) { |
|
|
|
url_adapt(NULL,0,NULL,NULL); |
|
|
|
} |
|
|
|
|
|
|
|
#define URLLEN 512 |
|
|
|
|
|
|
|
static void screw_url(lexdata *state) |
|
|
|
{ |
|
|
|
int len; |
|
|
|
char buf[URLLEN]; |
|
|
|
char url[URLLEN]; |
|
|
|
const char *p, *q; |
|
|
|
char c; |
|
|
|
|
|
|
|
|
|
|
|
/* search outer limits for URI */ |
|
|
|
for(p = state->start; p < state->crs && (c = *p); p++) |
|
|
|
if(!isspace(c)) break; |
|
|
|
if(c=='"') p++; |
|
|
|
for(; p < state->crs && (c = *p); p++) |
|
|
|
if(!isspace(c)) break; |
|
|
|
static char *url_attr_addon(const char *tag,const char *attr,const char *val,const char *buf) { |
|
|
|
int flag = 0; |
|
|
|
|
|
|
|
/* |
|
|
|
* we look at q-1, because q points to the character behind the last |
|
|
|
* character we are going to copy and the decision is based on that last |
|
|
|
* character |
|
|
|
*/ |
|
|
|
if(!strcasecmp(tag,"a")&&!strcasecmp(attr,"href")) { |
|
|
|
flag = 1; |
|
|
|
} else if(!strcasecmp(tag,"area")&&!strcasecmp(attr,"href")) { |
|
|
|
flag = 1; |
|
|
|
} else if(!strcasecmp(tag,"form")&&!strcasecmp(attr,"action")) { |
|
|
|
flag = 1; |
|
|
|
} else if(!strcasecmp(tag,"frame")&&!strcasecmp(attr,"source")) { |
|
|
|
flag = 1; |
|
|
|
} else if(!strcasecmp(tag,"img")&&!strcasecmp(attr,"action")) { |
|
|
|
flag = 1; |
|
|
|
} |
|
|
|
if(flag) { |
|
|
|
if(!strstr(val,buf)) |
|
|
|
{ |
|
|
|
char *p = (char *)emalloc(strlen(buf)+2); |
|
|
|
*p=strchr(val,'?')?'&':'?'; |
|
|
|
strcpy(p+1,buf); |
|
|
|
return p; |
|
|
|
} |
|
|
|
} |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
for(q = state->crs; q > state->start && (c = *(q-1)); q--) |
|
|
|
if(!isspace(c)) break; |
|
|
|
if(c=='"') q--; |
|
|
|
for(; q > state->start && (c = *(q-1)); q--) |
|
|
|
if(!isspace(c)) break; |
|
|
|
#define US BG(url_adapt_state) |
|
|
|
|
|
|
|
if(q<p) { p=state->start; q=state->crs; } |
|
|
|
char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen) |
|
|
|
{ |
|
|
|
char *out,*outp; |
|
|
|
int maxl,n,no_output; |
|
|
|
|
|
|
|
/* attach beginning */ |
|
|
|
ATTACH(state->start, p-state->start); |
|
|
|
|
|
|
|
/* copy old URI */ |
|
|
|
len = MIN(q - p, sizeof(buf) - 1); |
|
|
|
if(src==NULL) { |
|
|
|
US.state=STATE_NORMAL; |
|
|
|
if(US.tag) {efree(US.tag); US.tag =NULL; } |
|
|
|
if(US.attr) {efree(US.attr); US.attr=NULL; } |
|
|
|
if(US.val) {efree(US.val); US.val =NULL; } |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
|
|
|
|
memcpy(url, p, len); |
|
|
|
url[len] = '\0'; |
|
|
|
|
|
|
|
/* construct new URI */ |
|
|
|
len = snprintf(buf, sizeof(buf), "%s%c%s", url, |
|
|
|
memchr(state->start, '?', len) ? '&' : '?', |
|
|
|
state->data); |
|
|
|
if(srclen==0) |
|
|
|
srclen=strlen(src); |
|
|
|
|
|
|
|
/* attach new URI */ |
|
|
|
ATTACH(buf, len); |
|
|
|
|
|
|
|
/* attach rest */ |
|
|
|
ATTACH(q, state->crs - q); |
|
|
|
} |
|
|
|
out=malloc(srclen+1); |
|
|
|
maxl=srclen; |
|
|
|
n=srclen; |
|
|
|
no_output=0; |
|
|
|
|
|
|
|
static void catchup(lexdata *state) |
|
|
|
{ |
|
|
|
ATTACH(state->start, (state->crs - state->start)); |
|
|
|
} |
|
|
|
*newlen=0; |
|
|
|
outp=out; |
|
|
|
|
|
|
|
#line 144 |
|
|
|
while(n--) { |
|
|
|
switch(US.state) { |
|
|
|
case STATE_NORMAL: |
|
|
|
if(*src=='<') |
|
|
|
US.state=STATE_TAG_START; |
|
|
|
break; |
|
|
|
|
|
|
|
case STATE_TAG_START: |
|
|
|
if(! isalnum(*src)) |
|
|
|
US.state=STATE_NORMAL; |
|
|
|
US.state=STATE_TAG; |
|
|
|
US.ml=BUFSIZE; |
|
|
|
US.p=US.tag=erealloc(US.tag,US.ml); |
|
|
|
*(US.p)++=*src; |
|
|
|
US.l=1; |
|
|
|
break; |
|
|
|
|
|
|
|
static void url_scanner(lexdata *state) |
|
|
|
{ |
|
|
|
while(state->crs < state->end) { |
|
|
|
|
|
|
|
switch(state->state) { |
|
|
|
case INITIAL: |
|
|
|
{ |
|
|
|
YYCTYPE yych; |
|
|
|
unsigned int yyaccept; |
|
|
|
goto yy0; |
|
|
|
yy1: ++YYCURSOR; |
|
|
|
yy0: |
|
|
|
if((YYLIMIT - YYCURSOR) < 7) YYFILL(7); |
|
|
|
yych = *YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case '\000': goto yy7; |
|
|
|
case '<': goto yy2; |
|
|
|
default: goto yy4; |
|
|
|
} |
|
|
|
yy2: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'A': case 'a': goto yy9; |
|
|
|
case 'F': case 'f': goto yy10; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy3:yy4: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy5: switch(yych){ |
|
|
|
case '\000': case '<': goto yy6; |
|
|
|
default: goto yy4; |
|
|
|
} |
|
|
|
yy6: |
|
|
|
#line 157 |
|
|
|
{ BEGIN(INITIAL); } |
|
|
|
yy7: yych = *++YYCURSOR; |
|
|
|
yy8: |
|
|
|
#line 158 |
|
|
|
{ FINISH; } |
|
|
|
yy9: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'H': case 'h': goto yy3; |
|
|
|
case 'R': case 'r': goto yy41; |
|
|
|
default: goto yy40; |
|
|
|
} |
|
|
|
yy10: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'O': case 'o': goto yy12; |
|
|
|
case 'R': case 'r': goto yy11; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy11: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'A': case 'a': goto yy27; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy12: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'R': case 'r': goto yy13; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy13: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'M': case 'm': goto yy14; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy14: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'A': case 'a': goto yy3; |
|
|
|
default: goto yy16; |
|
|
|
} |
|
|
|
yy15: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy16: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy15; |
|
|
|
case 'A': case 'a': goto yy17; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy17: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'C': case 'c': goto yy18; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy18: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'T': case 't': goto yy19; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy19: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'I': case 'i': goto yy20; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy20: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'O': case 'o': goto yy21; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy21: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'N': case 'n': goto yy22; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy22: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy23: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy22; |
|
|
|
case '=': goto yy24; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy24: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy25: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy24; |
|
|
|
default: goto yy26; |
|
|
|
} |
|
|
|
yy26: |
|
|
|
#line 155 |
|
|
|
{ BEGIN(REF); } |
|
|
|
yy27: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'M': case 'm': goto yy28; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy28: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'E': case 'e': goto yy29; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy29: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'S': case 's': goto yy3; |
|
|
|
default: goto yy31; |
|
|
|
} |
|
|
|
yy30: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy31: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy30; |
|
|
|
case 'S': case 's': goto yy32; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy32: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'R': case 'r': goto yy33; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy33: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'C': case 'c': goto yy34; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy34: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy35: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy34; |
|
|
|
case '=': goto yy36; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy36: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy37: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy36; |
|
|
|
default: goto yy38; |
|
|
|
} |
|
|
|
yy38: |
|
|
|
#line 153 |
|
|
|
{ BEGIN(REF); } |
|
|
|
yy39: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy40: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy39; |
|
|
|
case 'H': case 'h': goto yy54; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy41: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'E': case 'e': goto yy42; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy42: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'A': case 'a': goto yy43; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy43: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'H': case 'h': goto yy3; |
|
|
|
default: goto yy45; |
|
|
|
} |
|
|
|
yy44: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy45: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy44; |
|
|
|
case 'H': case 'h': goto yy46; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy46: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'R': case 'r': goto yy47; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy47: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'E': case 'e': goto yy48; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy48: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'F': case 'f': goto yy49; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy49: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy50: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy49; |
|
|
|
case '=': goto yy51; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy51: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy52: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy51; |
|
|
|
default: goto yy53; |
|
|
|
} |
|
|
|
yy53: |
|
|
|
#line 156 |
|
|
|
{ BEGIN(REF); } |
|
|
|
yy54: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'R': case 'r': goto yy55; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy55: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'E': case 'e': goto yy56; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy56: yych = *++YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case 'F': case 'f': goto yy57; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy57: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy58: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy57; |
|
|
|
case '=': goto yy59; |
|
|
|
default: goto yy3; |
|
|
|
} |
|
|
|
yy59: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy60: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy59; |
|
|
|
default: goto yy61; |
|
|
|
} |
|
|
|
yy61: |
|
|
|
#line 154 |
|
|
|
{ BEGIN(REF); } |
|
|
|
} |
|
|
|
#line 159 |
|
|
|
case STATE_TAG: |
|
|
|
if(isalnum(*src)) { |
|
|
|
*(US.p)++ = *src; |
|
|
|
US.l++; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.tag=erealloc(US.tag,US.ml); |
|
|
|
US.p = US.tag+US.l; |
|
|
|
} |
|
|
|
} else if (isspace(*src)) { |
|
|
|
US.state = STATE_IN_TAG; |
|
|
|
*US.p='\0'; |
|
|
|
US.tag=erealloc(US.tag,US.l); |
|
|
|
} else { |
|
|
|
US.state = STATE_NORMAL; |
|
|
|
efree(US.tag); |
|
|
|
US.tag=NULL; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case STATE_IN_TAG: |
|
|
|
if(isalnum(*src)) { |
|
|
|
US.state=STATE_TAG_ATTR; |
|
|
|
US.ml=BUFSIZE; |
|
|
|
US.p=US.attr=erealloc(US.attr,US.ml); |
|
|
|
*(US.p)++=*src; |
|
|
|
US.l=1; |
|
|
|
} else if (! isspace(*src)) { |
|
|
|
US.state = STATE_NORMAL; |
|
|
|
efree(US.tag); |
|
|
|
US.tag=NULL; |
|
|
|
} |
|
|
|
break; |
|
|
|
case REF: |
|
|
|
{ |
|
|
|
YYCTYPE yych; |
|
|
|
unsigned int yyaccept; |
|
|
|
goto yy62; |
|
|
|
yy63: ++YYCURSOR; |
|
|
|
yy62: |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
switch(yych){ |
|
|
|
case '\000': case '>': goto yy64; |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': case '"': goto yy65; |
|
|
|
case '#': goto yy69; |
|
|
|
case ':': goto yy71; |
|
|
|
default: goto yy67; |
|
|
|
} |
|
|
|
yy64: |
|
|
|
#line 163 |
|
|
|
{ BEGIN(INITIAL); } |
|
|
|
yy65: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy66: switch(yych){ |
|
|
|
case '\000': case '>': goto yy64; |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy65; |
|
|
|
case '"': goto yy79; |
|
|
|
case '#': goto yy69; |
|
|
|
case ':': goto yy71; |
|
|
|
default: goto yy67; |
|
|
|
} |
|
|
|
yy67: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy68: switch(yych){ |
|
|
|
case '\000': case '>': goto yy64; |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy77; |
|
|
|
case '"': goto yy79; |
|
|
|
case '#': goto yy69; |
|
|
|
case ':': goto yy71; |
|
|
|
default: goto yy67; |
|
|
|
} |
|
|
|
yy69: yych = *++YYCURSOR; |
|
|
|
yy70: YYCURSOR -= 1; |
|
|
|
#line 164 |
|
|
|
{ BEGIN(INITIAL); } |
|
|
|
yy71: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy72: switch(yych){ |
|
|
|
case '\000': case '#': case '>': goto yy73; |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy74; |
|
|
|
case '"': goto yy76; |
|
|
|
default: goto yy71; |
|
|
|
} |
|
|
|
yy73: |
|
|
|
#line 165 |
|
|
|
{ |
|
|
|
/* don't modify absolute links */ |
|
|
|
state->state = INITIAL; BEGIN(INITIAL); |
|
|
|
} |
|
|
|
yy74: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy75: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy74; |
|
|
|
case '"': goto yy76; |
|
|
|
default: goto yy73; |
|
|
|
} |
|
|
|
yy76: yych = *++YYCURSOR; |
|
|
|
goto yy73; |
|
|
|
yy77: ++YYCURSOR; |
|
|
|
if(YYLIMIT == YYCURSOR) YYFILL(1); |
|
|
|
yych = *YYCURSOR; |
|
|
|
yy78: switch(yych){ |
|
|
|
case '\t': case '\v': |
|
|
|
case '\f': case ' ': goto yy77; |
|
|
|
case '"': goto yy79; |
|
|
|
default: goto yy64; |
|
|
|
} |
|
|
|
yy79: yych = *++YYCURSOR; |
|
|
|
goto yy64; |
|
|
|
} |
|
|
|
#line 169 |
|
|
|
|
|
|
|
break; |
|
|
|
} |
|
|
|
nextiter: |
|
|
|
; |
|
|
|
} |
|
|
|
finish: |
|
|
|
; |
|
|
|
} |
|
|
|
case STATE_TAG_ATTR: |
|
|
|
if(isalnum(*src)) { |
|
|
|
*US.p++=*src; |
|
|
|
++US.l; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.attr=erealloc(US.attr,US.ml); |
|
|
|
US.p = US.attr+US.l; |
|
|
|
} |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.attr=erealloc(US.attr,US.ml); |
|
|
|
US.p = US.attr+US.l; |
|
|
|
} |
|
|
|
} else if(isspace(*src)||(*src=='=')){ |
|
|
|
US.state=STATE_TAG_IS; |
|
|
|
*US.p=0; |
|
|
|
US.attr=erealloc(US.attr,US.l); |
|
|
|
} else if(*src=='>') { |
|
|
|
US.state=STATE_NORMAL; |
|
|
|
} else { |
|
|
|
efree(US.attr); |
|
|
|
US.attr=NULL; |
|
|
|
US.state=STATE_IN_TAG; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
case STATE_TAG_IS: |
|
|
|
case STATE_TAG_IS2: |
|
|
|
if(!isspace(*src)) { |
|
|
|
US.ml=BUFSIZE; |
|
|
|
US.p=US.val=erealloc(US.val,US.ml); |
|
|
|
US.l=0; |
|
|
|
if((*src=='"')||(*src=='\'')) { |
|
|
|
US.state=STATE_TAG_QVAL2; |
|
|
|
US.delim=*src; |
|
|
|
} else { |
|
|
|
US.state=STATE_TAG_VAL; |
|
|
|
*US.p++=*src; |
|
|
|
US.l++; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
char *url_adapt(const char *src, size_t srclen, const char *data, size_t *newlen) |
|
|
|
{ |
|
|
|
lexdata state; |
|
|
|
|
|
|
|
state.state = INITIAL; |
|
|
|
state.start = state.crs = src; |
|
|
|
state.end = src + srclen; |
|
|
|
state.ptr = NULL; |
|
|
|
state.target = NULL; |
|
|
|
state.targetsize = 0; |
|
|
|
state.data = data; |
|
|
|
case STATE_TAG_QVAL2: |
|
|
|
if(*src==US.delim) { |
|
|
|
char *p; |
|
|
|
US.state=STATE_IN_TAG; |
|
|
|
*US.p='\0'; |
|
|
|
p=url_attr_addon(US.tag,US.attr,US.val,data); |
|
|
|
if(p) { |
|
|
|
int l= strlen(p); |
|
|
|
maxl+=l; |
|
|
|
out=realloc(out,maxl); |
|
|
|
outp=out+*newlen; |
|
|
|
strcpy(outp,p); |
|
|
|
outp+=l; |
|
|
|
*newlen+=l; |
|
|
|
efree(p); |
|
|
|
} |
|
|
|
break; |
|
|
|
} else if(*src=='\\') { |
|
|
|
no_output=1; |
|
|
|
US.state=STATE_TAG_QVAL2b; |
|
|
|
} else if (*src=='>') { |
|
|
|
US.state=STATE_NORMAL; |
|
|
|
} |
|
|
|
|
|
|
|
*US.p++=*src; |
|
|
|
++US.l; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.val=erealloc(US.val,US.ml); |
|
|
|
US.p = US.val+US.l; |
|
|
|
} |
|
|
|
|
|
|
|
break; |
|
|
|
|
|
|
|
case STATE_TAG_QVAL2b: |
|
|
|
US.state=STATE_TAG_QVAL2; |
|
|
|
*US.p++=*src; |
|
|
|
++US.l; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.val=erealloc(US.val,US.ml); |
|
|
|
US.p = US.val+US.l; |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
url_scanner(&state); |
|
|
|
case STATE_TAG_VAL: |
|
|
|
if(!isspace(*src)) { |
|
|
|
if((*src=='"')||(*src=='\'')) { |
|
|
|
US.state=STATE_TAG_QVAL2; |
|
|
|
US.delim=*src; |
|
|
|
} else { |
|
|
|
*US.p++=*src; |
|
|
|
US.l++; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.val=erealloc(US.val,US.ml); |
|
|
|
US.p = US.val+US.l; |
|
|
|
} |
|
|
|
US.state=STATE_TAG_VAL2; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
|
|
|
|
if(newlen) *newlen = state.targetsize; |
|
|
|
case STATE_TAG_VAL2: |
|
|
|
if(isspace(*src)||(*src=='>')) { |
|
|
|
char *p; |
|
|
|
US.state=(*src=='>')?STATE_NORMAL:STATE_IN_TAG; |
|
|
|
*US.p='\0'; |
|
|
|
p=url_attr_addon(US.tag,US.attr,US.val,data); |
|
|
|
if(p) { |
|
|
|
int l= strlen(p); |
|
|
|
maxl+=l; |
|
|
|
out=realloc(out,maxl); |
|
|
|
outp=out+*newlen; |
|
|
|
strcpy(outp,p); |
|
|
|
outp+=l; |
|
|
|
*newlen+=l; |
|
|
|
efree(p); |
|
|
|
} |
|
|
|
} else { |
|
|
|
*US.p++=*src; |
|
|
|
US.l++; |
|
|
|
if(US.l==US.ml) { |
|
|
|
US.ml+=BUFSIZE; |
|
|
|
US.val=erealloc(US.val,US.ml); |
|
|
|
US.p = US.val+US.l; |
|
|
|
} |
|
|
|
} |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return state.target; |
|
|
|
if(no_output) { |
|
|
|
src++; |
|
|
|
no_output=0; |
|
|
|
continue; |
|
|
|
} |
|
|
|
*outp++=*src++; |
|
|
|
*newlen+=1; |
|
|
|
} |
|
|
|
*outp='\0'; |
|
|
|
return out; |
|
|
|
} |
|
|
|
|
|
|
|
#endif |