From 2928f162f9a35f5703d9c68bd7de48326e0fc72d Mon Sep 17 00:00:00 2001 From: JediKev <kevin@enhancesoft.com> Date: Tue, 27 Jun 2017 16:15:13 -0500 Subject: [PATCH] pages: Translate Special Characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses issue #3842 where special characters in a Page name causes a 403 or 404 error. This adds a method to convert the special characters to html entities and then removes the entity suffixes leaving only un-accented characters behind. (e.g. 'ã' => 'ã' => 'a') --- include/class.format.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/class.format.php b/include/class.format.php index 6058142d8..f9f2ab574 100644 --- a/include/class.format.php +++ b/include/class.format.php @@ -737,6 +737,13 @@ class Format { // Thanks, http://stackoverflow.com/a/2955878/1025836 /* static */ function slugify($text) { + // convert special characters to entities + $text = htmlentities($text, ENT_NOQUOTES, 'UTF-8'); + + // removes entity suffixes, leaving only un-accented characters + $text = preg_replace('~&([A-za-z])(?:acute|cedil|circ|grave|orn|ring|slash|th|tilde|uml);~', '$1', $text); + $text = preg_replace('~&([A-za-z]{2})(?:lig);~', '$1', $text); + // replace non letter or digits by - $text = preg_replace('~[^\p{L}\p{N}]+~u', '-', $text); -- GitLab