[top-snip] nice! I'll give it a look this morning [/snip] This is another rather hackish attempt at using the tokeniser. It turns this: <?php $emu = "my_column {$banana}"; $wallaby = 'my_table'; $kookaburra = 'SELECT * FROM'; $kookaburra .= $wallaby; $koala = 'ASC'; $taipan = ' ORDER BY' . $emu; $dropBear = 'group by something'; mysql_query("$kookaburra WHERE (up = 'down') $taipan $koala " . strtoupper($dropBear)); ?> into this: SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana ASC strtoupper( group by something) Which isn't perfect by a long shot, but I'm away home now so it'll have to do. I'd be interested to know what it makes of your queries. -robin <?php //looks at all $dir/*.php files. $dir = '/path/to/php/files'; foreach (getPhpFileList($dir) as $file) { print "===== $file =====\n"; $token = tokeniseFile($file); // first find all calls to mysql_query() $mysqlCalls = getMysqlQueryCalls($token); foreach($mysqlCalls as $range) { $sql = resolveExpression($token, $range[0], $range[0], $range[1]); echo trim(preg_replace('/\s+/', ' ', $sql)), "\n"; // tidy it a little } } function getMysqlQueryCalls($token) { $callList = array(); for ($i=0; isset($token[$i]); $i++) { if (!is_array($token[$i])) continue; // not interested in atomic tokens if (strtolower($token[$i][1]) !== 'mysql_query') continue; $args = getMysqlQueryArguments($i, $token); if ($args !== false) $call[] = $args; } return $call; } function nameTokens($token, $start = 0, $end = null) { if (is_null($end)) $end = sizeof($token); $range = array_slice($token, $start, $end - $start); foreach ($range as $key => $tok) { if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]); } return $range; } function resolveToken($token, $cursor, $i) { if(is_array($token[$i])) { switch ($token[$i][0]) { case T_DOLLAR_OPEN_CURLY_BRACES: case T_CURLY_OPEN: break; case T_STRING: case T_WHITESPACE: case T_ENCAPSED_AND_WHITESPACE: return $token[$i][1]; case T_CONSTANT_ENCAPSED_STRING: return eval( "return {$token[$i][1]};"); case T_VARIABLE: case T_STRING_VARNAME: $def = findLastDefinition($token, $cursor, $i); if ($def === false) { // can't find anything else to replace $var with // presume it's defined elsewhere or we're not clever // enough to find it. return preg_replace('/^\$*/', '$', $token[$i][1]); } if ($def[3] == '.=') return resolveToken($token, $def[0], $i) . resolveExpression($token, $def[0], $def[1], $def[2]); return resolveExpression($token, $def[0], $def[1], $def[2]); default: return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')'; } } else { switch ($token[$i]) { case '(': case ')': return $token[$i]; default: return ''; } } } function ResolveExpression($token, $cursor, $start, $end) { $output = ''; // just try and resolve all the tokens in the expression, concat them and throw them back. for ( $i=$start; $i<$end; $i++) { $output .= resolveToken($token, $cursor, $i); } return $output; } function findLastDefinition($token, $i, $id) { // make sure all variables are in the form $name as ${name} ones are // just 'name' by the time they end up here. $name = preg_replace('/^\$*/', '$', $token[$id][1]); // rewind until we hit an assignment or run out of tokens while (isset($token[--$i])) { // if we catch an assignment and our $name is to the left then capture the right. if (($token[$i] == '=' || (is_array($token[$i]) && $token[$i][0] == T_CONCAT_EQUAL)) && getLHS($token, $i) == $name) { $RHS = getRHS($token, $i); $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i]; return $RHS; } } // we've run out of tokens, so seems like we can't find where this variable was defined. return false; } function getLHS($token, $i) { // rewind until we hit an variable name or run out of tokens while (isset($token[--$i])) { if (is_array($token[$i]) && $token[$i][0] == T_VARIABLE) return $token[$i][1]; } // run out of tokens, we can't get a left hand side. return false; } function getRHS($token, $i) { // save the cursor at the assignment operator, so if $name is referred to on // the RHS, and we have to look for it again, we don't look at this bit. $cursor = $i; // fast forward until we get to a ';' or run out of tokens. while (isset($token[++$i]) && $token[$i] != ';') { if (!isset($start)) $start = $i; } // if we've run out of tokens or the RHS is empty then give up. if (!isset($start) || !isset($token[$i])) return false; return array($cursor, $start, $i); } // just stick all the tokens together to see what's going on. function renderTokens($token, $start, $end) { $output = ''; for ( $i=$start; $i<$end; $i++) { $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i]; } return $output; } function getMysqlQueryArguments($i, $token) { /* only allowed whitespace before brackets */ while (isset($token[++$i])) { if ($token[$i] === '(') break; if (!is_array($token[$i])) return false; if ($token[$i][0] !== T_WHITESPACE) return false; } // if we're here, we've found the '(' // now find the matching ')' $start = $i; $braceCount = 1; while (isset($token[++$i]) && $braceCount) { if ($token[$i] === '(') $braceCount++; if ($token[$i] === ')') $braceCount--; } // if we're left with any unmatched braces, something's wrong. if ($braceCount != 0) return false; // otherwise return the start and end positions of the parameters. return array($start+1,$i-1); } function getPhpFileList($dir) { if (!is_dir($dir)) $dir = '.'; return glob($dir . '/*.php'); } function tokeniseFile($file) { return token_get_all(file_get_contents($file)); } function findMysqlQueries($tokens) { return array_filter($tokens, 'isMysqlQuery'); } function isMysqlQuery($token) { if (!is_array($token)) return 0; return ($token[0] == T_FUNCTION) || (strtolower($token[1]) == 'mysql_query'); } ?> -- PHP General Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php -- PHP General Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php