<?php *# Hello Community # Internationalisation, a topic discussed more than enough and YES, I am looking forward to PHP6. # But in reality I still have to develop for PHP4 and that's where the dog is burried ^^ # We have a customer here who is running a small site, but still in five different languages. # Lately he started complaining about some strange site behaviours: # He has a discussion board where people can post their ideas, comments etc. Nothing special # Every post has a maximum length of 2048 characters, which is checked by JavaScript at the Browser # and after submitting the form by PHP. # Our mistake was to use strlen();* global $cc_strlen; global $cc_mb; $cc_strlen = $cc_mb = 0; if (array_key_exists('text', $_POST)) { $cc_strlen = strlen($_POST['text']); $cc_mb = mb_strlen($_POST['text'], 'UTF-8'); *// new code* if ($cc_strlen > 2048) { /* snip */ } // do something } /* snip */ // do something *#this works fine as long as the user only submits single byte charachters, but with UTF-8 the whole thing changes ..* ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" " http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>test</title> </head> <body> <p>You submitted <?php echo $cc_strlen; ?> characters (STRLEN).</p> <p>You submitted <?php echo $cc_mb; ?> characters (MB_STRLEN).</p> <p>Characters Left:<span id="remainder">2048</span></p> <form action="" method="post" onsubmit="return false;" id="post_form"> <textarea id="post_text" name="text" onkeydown="check_length();" onchange="check_length();" rows="10" cols="50">œŸŒ‡Ņ</textarea><br /> <input type="submit" value="Submit" id="post_button" onclick="submit_form();" /> </form> <script type="text/javascript"> <!-- var the_form = document.getElementById('post_form'); var textarea = document.getElementById('post_text'); var counter = document.getElementById('remainder'); function check_length() { var remainder = 2048 - textarea.value.length; var length_alert = false; if (remainder < 0) { remainder = 0; for (var count = textarea.value.length; (count >= 2048); (count -= 1)) { textarea.value = textarea.value.substr(0, 2047); counter.style.color = 'red' length_alert = true; } } if (length_alert) alert('You are already using 2048 characters.'); if (document.all) { counter.innerText = remainder; } else { counter.textContent = remainder; } } function submit_form() { check_length(); the_form.submit(); alert ('You submitted ' + textarea.value.length + ' characters'); return true; } --> </script> <?php *# Now as soon as one is starting to submit UTF-8 characters strlen is not working proberly any more # So we had to work through thousands of lines of code, replacing strlen() with mb_strlen(); # We also found mb_strlen to take about 8 times longer than strlen().* $s_t = microtime(); mb_strlen('œŸŒ‡Ņ', 'UTF-8'); $e_t = microtime(); echo '<p>MB_STRLEN took : '.(($e_t - $s_t)*1000).' milliseconds</p>'; $s_t = microtime(); strlen('œŸŒ‡Ņ'); $e_t = microtime(); echo '<p>STRLEN took : '.(($e_t - $s_t)*1000).' milliseconds</p>'; *# So much for internationalisation. # Just writing this as a reminder for everyone who is facing similar situations.* ?> </body> </html>