RE: Re: [new version] Re: [a proactive example of learning by hacking] Re: [PHP] Getting queries from files FYI

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[top-snip]
nice! I'll give it a look this morning
[/snip]


This is another rather hackish attempt at using the tokeniser.

It turns this:

<?php
$emu = "my_column {$banana}";
$wallaby = 'my_table';
$kookaburra = 'SELECT * FROM';
$kookaburra .= $wallaby;
$koala = 'ASC';
$taipan =  ' ORDER BY' . $emu;
$dropBear = 'group by something';

mysql_query("$kookaburra WHERE (up = 'down') $taipan $koala " .
strtoupper($dropBear));
?>

into this:

SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana
ASC strtoupper( group by something)

Which isn't perfect by a long shot, but I'm away home now so it'll have
to do.

I'd be interested to know what it makes of your queries.

 -robin

<?php

//looks at all $dir/*.php files.
$dir = '/path/to/php/files';

foreach (getPhpFileList($dir) as $file) {
  print "===== $file =====\n";
  $token = tokeniseFile($file);

  // first find all calls to mysql_query()
  $mysqlCalls = getMysqlQueryCalls($token);  
  foreach($mysqlCalls as $range) {
    $sql = resolveExpression($token, $range[0], $range[0], $range[1]);
    echo trim(preg_replace('/\s+/', ' ', $sql)), "\n"; // tidy it a
little
  }
}


function getMysqlQueryCalls($token) {
  $callList = array();
  for ($i=0; isset($token[$i]); $i++) {
    if (!is_array($token[$i])) continue; // not interested in atomic
tokens
    if (strtolower($token[$i][1]) !== 'mysql_query') continue; 
    $args = getMysqlQueryArguments($i, $token);
    if ($args !== false) $call[] = $args;
  }
  return $call;
}

function nameTokens($token, $start = 0, $end = null)
{
  if (is_null($end)) $end = sizeof($token);
  $range = array_slice($token, $start, $end - $start);
  foreach ($range as $key => $tok) {
    if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]);
  }
  return $range;
}

function resolveToken($token, $cursor, $i) {
  if(is_array($token[$i])) { 
    switch ($token[$i][0]) {
    case T_DOLLAR_OPEN_CURLY_BRACES:
    case T_CURLY_OPEN: 
      break;
    case T_STRING:
    case T_WHITESPACE:
    case T_ENCAPSED_AND_WHITESPACE:
      return $token[$i][1];
    case T_CONSTANT_ENCAPSED_STRING:
      return eval( "return {$token[$i][1]};");
    case T_VARIABLE:
    case T_STRING_VARNAME:
      $def = findLastDefinition($token, $cursor, $i);
      if ($def === false) {
	// can't find anything else to replace $var with
	// presume it's defined elsewhere or we're not clever
	// enough to find it.
	return preg_replace('/^\$*/', '$', $token[$i][1]);
      }
      if ($def[3] == '.=') return 
	resolveToken($token, $def[0], $i)
	. resolveExpression($token, $def[0], $def[1], $def[2]);
      
      return resolveExpression($token, $def[0], $def[1], $def[2]);
    default:
      return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')';
    }
  } else {
    switch ($token[$i]) {
    case '(':
    case ')':
      return $token[$i];
    default:
      return '';
    }
  }
}

function ResolveExpression($token, $cursor, $start, $end) {
  $output = '';
  // just try and resolve all the tokens in the expression, concat
them and throw them back.
  for ( $i=$start; $i<$end; $i++) {
    $output .= resolveToken($token, $cursor, $i);
  }
  return $output;
}

function findLastDefinition($token, $i, $id) {
  // make sure all variables are in the form $name as ${name} ones are
  // just 'name' by the time they end up here.
  $name = preg_replace('/^\$*/', '$', $token[$id][1]);

  // rewind until we hit an assignment or run out of tokens
  while (isset($token[--$i])) {
    // if we catch an assignment and our $name is to the left then
capture the right.
    if (($token[$i] == '=' || (is_array($token[$i]) && $token[$i][0]
== T_CONCAT_EQUAL)) && getLHS($token, $i) == $name) {
      $RHS = getRHS($token, $i);
      $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i];
      return $RHS;
    }
  }
  // we've run out of tokens, so seems like we can't find where this
variable was defined.
  return false;
}


function getLHS($token, $i)
{
  // rewind until we hit an variable name or run out of tokens
  while (isset($token[--$i])) {
    if (is_array($token[$i]) && $token[$i][0] == T_VARIABLE) return
$token[$i][1];
  }
  // run out of tokens, we can't get a left hand side.
  return false;
}

function getRHS($token, $i)
{
  // save the cursor at the assignment operator, so if $name is referred
to on
  // the RHS, and we have to look for it again, we don't look at this
bit.
  $cursor = $i;

  // fast forward until we get to a ';' or run out of tokens.
  while (isset($token[++$i]) && $token[$i] != ';') {
    if (!isset($start)) $start = $i;
  }  
  // if we've run out of tokens or the RHS is empty then give up.
  if (!isset($start) || !isset($token[$i])) return false;
  return array($cursor, $start, $i);
}

// just stick all the tokens together to see what's going on.
function renderTokens($token, $start, $end)
{
  $output = '';
  for ( $i=$start; $i<$end; $i++) {
    $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i];
  }
  return $output;
}


function getMysqlQueryArguments($i, $token) {
  /* only allowed whitespace before brackets */
  while (isset($token[++$i])) {
    if ($token[$i] === '(') break;
    if (!is_array($token[$i])) return false;
    if ($token[$i][0] !== T_WHITESPACE) return false;
  }

  // if we're here, we've found the '('
  // now find the matching ')'
  $start = $i;
  $braceCount = 1;
  while (isset($token[++$i]) && $braceCount) {
    if ($token[$i] === '(') $braceCount++;
    if ($token[$i] === ')') $braceCount--;
    
  }
  // if we're left with any unmatched braces, something's wrong.
  if ($braceCount != 0) return false;
  // otherwise return the start and end positions of the parameters.
  return array($start+1,$i-1);
}

function getPhpFileList($dir)
{
  if (!is_dir($dir)) $dir = '.';
  return glob($dir . '/*.php');
}

function tokeniseFile($file) 
{
  return token_get_all(file_get_contents($file));
}

function findMysqlQueries($tokens) 
{
  return array_filter($tokens, 'isMysqlQuery');
}

function isMysqlQuery($token) {
  if (!is_array($token)) return 0;
  return ($token[0] == T_FUNCTION) || (strtolower($token[1]) ==
'mysql_query'); 
}
?>

-- 
PHP General Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

-- 
PHP General Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php



[Index of Archives]     [PHP Home]     [Apache Users]     [PHP on Windows]     [Kernel Newbies]     [PHP Install]     [PHP Classes]     [Pear]     [Postgresql]     [Postgresql PHP]     [PHP on Windows]     [PHP Database Programming]     [PHP SOAP]

  Powered by Linux