and tags. Do this first // because there might be '); // Decode HTML entities. $input = html_entity_decode($input, ENT_QUOTES, 'UTF-8'); // Convert entities equivalent to spaces to actual spaces. $input = str_replace(array(' ', ' '), ' ', $input); // This fixes issues such as '

Title

Paragraph

' // being transformed into 'TitleParagraph' with no space. $input = str_replace('>', '> ', $input); // Strip HTML tags. $input = strip_tags($input); return parent::parse($input); } /** * Method to process HTML input and extract the plain text. * * @param string $input The input to process. * * @return string The plain text input. * * @since 2.5 */ protected function process($input) { // Replace any amount of white space with a single space. return preg_replace('#\s+#u', ' ', $input); } /** * Method to remove blocks of text between a start and an end tag. * Each block removed is effectively replaced by a single space. * * Note: The start tag and the end tag must be different. * Note: Blocks must not be nested. * Note: This method will function correctly with multi-byte strings. * * @param string $input String to be processed. * @param string $startTag String representing the start tag. * @param string $endTag String representing the end tag. * * @return string with blocks removed. */ private function removeBlocks($input, $startTag, $endTag) { $return = ''; $offset = 0; $startTagLength = strlen($startTag); $endTagLength = strlen($endTag); // Find the first start tag. $start = stripos($input, $startTag); // If no start tags were found, return the string unchanged. if ($start === false) { return $input; } // Look for all blocks defined by the start and end tags. while ($start !== false) { // Accumulate the substring up to the start tag. $return .= substr($input, $offset, $start - $offset) . ' '; // Look for an end tag corresponding to the start tag. $end = stripos($input, $endTag, $start + $startTagLength); // If no corresponding end tag, leave the string alone. if ($end === false) { // Fix the offset so part of the string is not duplicated. $offset = $start; break; } // Advance the start position. $offset = $end + $endTagLength; // Look for the next start tag and loop. $start = stripos($input, $startTag, $offset); } // Add in the final substring after the last end tag. $return .= substr($input, $offset); return $return; } }