From 2928f162f9a35f5703d9c68bd7de48326e0fc72d Mon Sep 17 00:00:00 2001
From: JediKev <kevin@enhancesoft.com>
Date: Tue, 27 Jun 2017 16:15:13 -0500
Subject: [PATCH] pages: Translate Special Characters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This addresses issue #3842 where special characters in a Page name causes a
403 or 404 error. This adds a method to convert the special characters to
html entities and then removes the entity suffixes leaving only
un-accented characters behind. (e.g. 'ã' => '&atilde;' => 'a')
---
 include/class.format.php | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/class.format.php b/include/class.format.php
index 6058142d8..f9f2ab574 100644
--- a/include/class.format.php
+++ b/include/class.format.php
@@ -737,6 +737,13 @@ class Format {
     // Thanks, http://stackoverflow.com/a/2955878/1025836
     /* static */
     function slugify($text) {
+        // convert special characters to entities
+        $text = htmlentities($text, ENT_NOQUOTES, 'UTF-8');
+
+        // removes entity suffixes, leaving only un-accented characters
+        $text = preg_replace('~&([A-za-z])(?:acute|cedil|circ|grave|orn|ring|slash|th|tilde|uml);~', '$1', $text);
+        $text = preg_replace('~&([A-za-z]{2})(?:lig);~', '$1', $text);
+
         // replace non letter or digits by -
         $text = preg_replace('~[^\p{L}\p{N}]+~u', '-', $text);
 
-- 
GitLab