It turns this:
<?php
$emu = "my_column {$banana}";
$wallaby = 'my_table';
$kookaburra = 'SELECT * FROM';
$kookaburra .= $wallaby;
$koala = 'ASC';
$taipan = ' ORDER BY' . $emu;
$dropBear = 'group by something';
mysql_query("$kookaburra WHERE (up = 'down') $taipan $koala " .
strtoupper($dropBear));
?>
into this:
SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana
ASC strtoupper( group by something)
Which isn't perfect by a long shot, but I'm away home now so it'll have to do.
I'd be interested to know what it makes of your queries.
-robin
<?php
//looks at all $dir/*.php files.
$dir = '/path/to/php/files';
foreach (getPhpFileList($dir) as $file) {
print "===== $file =====\n";
$token = tokeniseFile($file);
// first find all calls to mysql_query()
$mysqlCalls = getMysqlQueryCalls($token);
foreach($mysqlCalls as $range) {
$sql = resolveExpression($token, $range[0], $range[0], $range[1]);
echo trim(preg_replace('/\s+/', ' ', $sql)), "\n"; // tidy it a little
}
}
function getMysqlQueryCalls($token) {
$callList = array();
for ($i=0; isset($token[$i]); $i++) {
if (!is_array($token[$i])) continue; // not interested in atomic tokens
if (strtolower($token[$i][1]) !== 'mysql_query') continue;
$args = getMysqlQueryArguments($i, $token);
if ($args !== false) $call[] = $args;
}
return $call;
}
function nameTokens($token, $start = 0, $end = null)
{
if (is_null($end)) $end = sizeof($token);
$range = array_slice($token, $start, $end - $start);
foreach ($range as $key => $tok) {
if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]);
}
return $range;
}
function resolveToken($token, $cursor, $i) {
if(is_array($token[$i])) {
switch ($token[$i][0]) {
case T_DOLLAR_OPEN_CURLY_BRACES:
case T_CURLY_OPEN:
break;
case T_STRING:
case T_WHITESPACE:
case T_ENCAPSED_AND_WHITESPACE:
return $token[$i][1];
case T_CONSTANT_ENCAPSED_STRING:
return eval( "return {$token[$i][1]};");
case T_VARIABLE:
case T_STRING_VARNAME:
$def = findLastDefinition($token, $cursor, $i);
if ($def === false) {
// can't find anything else to replace $var with
// presume it's defined elsewhere or we're not clever
// enough to find it.
return preg_replace('/^\$*/', '$', $token[$i][1]);
}
if ($def[3] == '.=') return
resolveToken($token, $def[0], $i)
. resolveExpression($token, $def[0], $def[1], $def[2]);
return resolveExpression($token, $def[0], $def[1], $def[2]);
default:
return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')';
}
} else {
switch ($token[$i]) {
case '(':
case ')':
return $token[$i];
default:
return '';
}
}
}
function ResolveExpression($token, $cursor, $start, $end) {
$output = '';
// just try and resolve all the tokens in the expression, concat
them and throw them back.
for ( $i=$start; $i<$end; $i++) {
$output .= resolveToken($token, $cursor, $i);
}
return $output;
}
function findLastDefinition($token, $i, $id) {
// make sure all variables are in the form $name as ${name} ones are
// just 'name' by the time they end up here.
$name = preg_replace('/^\$*/', '$', $token[$id][1]);
// rewind until we hit an assignment or run out of tokens
while (isset($token[--$i])) {
// if we catch an assignment and our $name is to the left then
capture the right.
if (($token[$i] == '=' || (is_array($token[$i]) && $token[$i][0]
== T_CONCAT_EQUAL)) && getLHS($token, $i) == $name) {
$RHS = getRHS($token, $i);
$RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i];
return $RHS;
}
}
// we've run out of tokens, so seems like we can't find where this
variable was defined.
return false;
}
function getLHS($token, $i)
{
// rewind until we hit an variable name or run out of tokens
while (isset($token[--$i])) {
if (is_array($token[$i]) && $token[$i][0] == T_VARIABLE) return
$token[$i][1];
}
// run out of tokens, we can't get a left hand side.
return false;
}
function getRHS($token, $i)
{
// save the cursor at the assignment operator, so if $name is referred to on
// the RHS, and we have to look for it again, we don't look at this bit.
$cursor = $i;
// fast forward until we get to a ';' or run out of tokens.
while (isset($token[++$i]) && $token[$i] != ';') {
if (!isset($start)) $start = $i;
}
// if we've run out of tokens or the RHS is empty then give up.
if (!isset($start) || !isset($token[$i])) return false;
return array($cursor, $start, $i);
}
// just stick all the tokens together to see what's going on.
function renderTokens($token, $start, $end)
{
$output = '';
for ( $i=$start; $i<$end; $i++) {
$output .= is_array($token[$i]) ? $token[$i][1] : $token[$i];
}
return $output;
}
function getMysqlQueryArguments($i, $token) {
/* only allowed whitespace before brackets */
while (isset($token[++$i])) {
if ($token[$i] === '(') break;
if (!is_array($token[$i])) return false;
if ($token[$i][0] !== T_WHITESPACE) return false;
}
// if we're here, we've found the '('
// now find the matching ')'
$start = $i;
$braceCount = 1;
while (isset($token[++$i]) && $braceCount) {
if ($token[$i] === '(') $braceCount++;
if ($token[$i] === ')') $braceCount--;
}
// if we're left with any unmatched braces, something's wrong.
if ($braceCount != 0) return false;
// otherwise return the start and end positions of the parameters.
return array($start+1,$i-1);
}
function getPhpFileList($dir)
{
if (!is_dir($dir)) $dir = '.';
return glob($dir . '/*.php');
}
function tokeniseFile($file)
{
return token_get_all(file_get_contents($file));
}
function findMysqlQueries($tokens)
{
return array_filter($tokens, 'isMysqlQuery');
}
function isMysqlQuery($token) {
if (!is_array($token)) return 0;
return ($token[0] == T_FUNCTION) || (strtolower($token[1]) == 'mysql_query');
}
?>