<?php
/*
Plugin Name:Similar Posts
Plugin URI: http://rmarsh.com/plugins/similar-posts/
Description: Displays a list of posts similar to the current one by comparing the way they use words. <a href="http://rmarsh.com/plugins/post-options/">Instructions and help online.</a>
Version: 2.3.6
Author: Rob Marsh, SJ
Author URI: http://rmarsh.com/
*/

/*
Copyright 2007  Rob Marsh, SJ  (http://rmarsh.com)

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details: http://www.gnu.org/licenses/gpl.txt
*/


/*
	Template Tag: Displays the posts most similar to the current post.
		e.g.: <?php similar_posts(); ?>
	Full help and instructions at http://rmarsh.com/plugins/post-options/
*/
function similar_posts($args = '') {
	global $post;
	$result = false;
	// if the Plugin Output Cache plugin is installed we can cheat...
	if (defined('POC_CACHE')) {
		$key = 's_p'.$post->ID.$args;
		$cache = new POC_Cache();
		$cache->timer_start();
		$result = $cache->fetch($key);
		if ($result) $cache_time = sprintf('<!-- POC cache hit: %.3f ms -->', 1000 * $cache->timer_stop());
	}
	// ... otherwise we do it the hard way
	if (false === $result) {
		$sp = new SimilarPosts($args, '<li>{link}</li>');
		$sp->workhorse();
		$result = $sp->output();
		unset($sp);
		if (defined('POC_CACHE')) {
			$cache->store($key, $result);
			$cache_time = sprintf('<!-- POC cache miss: %.3f ms -->', 1000 * $cache->timer_stop());
		}
	} 
	echo $result;
	if (defined('POC_CACHE')) echo $cache_time;
}

/*
	The plugin class does all the work.
*/
class SimilarPosts {
	var $p; // the various parameters
	var $results; // the query results
	var $running_time;
	var $count_terms;
	
	// The constructor processes the various parameters
	function SimilarPosts($args, $default_output){
		$this->running_time = $this->getmicrotime();
		// First we process any arguments to see if any defaults have been overridden
		// 	$args is of the form 'key1=val1&key2=val2'
		//	The code should cope with null values, e.g., 'key1=&key2=val2' 
		//	and arguments with embedded '=', e.g. 'output_template=<li class="stuff">{...}</li>'.
		if($args){
			// separate the arguments into key=value pairs
			$arguments = explode("&", $args);
			foreach($arguments as $arg){
				if($arg){
					// find the position of the first '='
					$i = strpos($arg, '=');
					// if not a valid format ('key=value) we ignore it
					if ($i){
						$key = substr($arg, 0, $i);
						$val = substr($arg, $i+1); 
						// store the arguments in $this->p
						$this->p[$key]=$val;
					}
				}
			}
		}
		
		// This block retrieves the options stored in the database and uses them unless a value has been overridden 
		// by specifying it's value in the arguments above
		
		$options = get_option('similar-posts');
		
	  	if (!isset($this->p['limit'])) $this->p['limit'] = stripslashes($options['limit']);
	  	if (!isset($this->p['skip'])) $this->p['skip'] = stripslashes($options['skip']);
	  	if (!isset($this->p['trim_before'])) $this->p['trim_before'] = stripslashes($options['trim_before']);
	  	if (!isset($this->p['omit_current_post'])) $this->p['omit_current_post'] = $options['omit_current_post'];
	  	if (!isset($this->p['show_private'])) $this->p['show_private'] = $options['show_private'];
		if (!isset($this->p['show_pages'])) $this->p['show_pages'] = $options['show_pages'];
		if (!isset($this->p['none_text'])) $this->p['none_text'] = stripslashes($options['none_text']);
		if (!isset($this->p['tag_str'])) $this->p['tag_str'] = stripslashes($options['tag_str']);
		if (!isset($this->p['excluded_cats'])) $this->p['excluded_cats'] = stripslashes($options['excluded_cats']);
		// a blank value has to be translated to the internal code '9999'
		if (trim($this->p['excluded_cats'])=='') $this->p['excluded_cats'] = '9999';
		if (!isset($this->p['included_cats'])) $this->p['included_cats'] = stripslashes($options['included_cats']);
		if (trim($this->p['included_cats'])=='') $this->p['included_cats'] = '9999';
		if (!isset($this->p['excluded_authors'])) $this->p['excluded_authors'] = stripslashes($options['excluded_authors']);
		if (trim($this->p['excluded_authors'])=='') $this->p['excluded_authors'] = '9999';
		if (!isset($this->p['included_authors'])) $this->p['included_authors'] = stripslashes($options['included_authors']);
		if (trim($this->p['included_authors'])=='') $this->p['included_authors'] = '9999';
		if (!isset($this->p['excluded_posts'])) $this->p['excluded_posts'] = stripslashes($options['excluded_posts']);
		if (!isset($this->p['stripcodes'])) $this->p['stripcodes'] = $options['stripcodes'];
	  	if (!isset($this->p['prefix'])) $this->p['prefix'] = stripslashes($options['prefix']);
	  	if (!isset($this->p['suffix'])) $this->p['suffix'] = stripslashes($options['suffix']);
	  	if (!isset($this->p['output_template'])) $this->p['output_template'] = stripslashes($options['output_template']);
		// an empty output_template makes no sense so we fall back to the default
		if ($this->p['output_template'] == '') $this->p['output_template'] = $default_output;
		
		// just for Similar Posts
		if (!isset($this->p['match_cat'])) $this->p['match_cat'] = $options['match_cat'];
		if (!isset($this->p['match_tags'])) $this->p['match_tags'] = $options['match_tags'];
		if (!isset($this->p['bias_title'])) $this->p['bias_title'] = $options['bias_title'];
		if (!isset($this->p['utf8'])) $this->p['utf8'] = $options['utf8'];
	  	if (!isset($this->p['num_terms'])) $this->p['num_terms'] = stripslashes($options['num_terms']);

		// Now we process the output_template to find the embedded tags which are to be replaced
		// with values taken from the database.
		
		// These are the tags which are recognised by the output template processor and 
		// the names of the database fields needed to evaluate each one.
		$valid_tags = array(
			'score' => array('ID'),
			'title' => array('post_title'),
			'url' => array('ID'),
			'author' => array('post_author'),
			'date' => array('post_date'),
			'dateedited' => array('post_modified'),
			'excerpt' => array('post_excerpt', 'post_content'),
			'snippet' => array('post_content'),
			'snippetword' => array('post_content'),
			'fullpost' => array('post_content'),
			'categorylinks' => array('ID'),
			'catnames' => array('ID'),
			'commentcount' => array('comment_count'),
			'custom' => array('ID'),
			'tags' => array('ID'),
			'taglinks' => array('ID'),
			'link' => array('post_title', 'ID', 'post_date'));
			
		// this will contain the needed fields
		$fields = array();
		$fields[] = 'ID';
		// Find all the tags in the template and store them in the $matches array
		preg_match_all('/{(.+?)}/', $this->p['output_template'], $matches);
		
		// A tag is of the form , {tag:ext}, where the tag part will be evaluated and replaced 
		// and the optional ext part provides extra data 
		
		// this will be the array of tags to replace
		$fulltags = array();
		// this will be the array of validated tags
		$tags = array();
		// this will be the array of extra data
		$exts = array();
		// get each tag
		foreach ($matches[0] as $match) {
			// get the extra data  (if any)
			$pos = strpos($match, ':');
			if ($pos) {
				$ext = rtrim(substr($match, $pos+1), '}');
				$tag = ltrim(substr($match, 0, $pos), '{');
			} else {
				$ext = '';
				$tag = trim($match, '{}');
			}
			// check if it is a valid tag
			if (isset($valid_tags[$tag])){
				// add the tag in a form ready to use in translation later
				$fulltags[] = $match;
				// the bare tag
				$tags[] = $tag;
				// the extra data if any
				$exts[] = $ext;
				// store the needed field names
				foreach ($valid_tags[$tag] as $fieldname) {
					$fields[] = $fieldname;
				}
			} 
		}
		
		// Store the tag information in the parameter array
		$this->p['fulltags'] = $fulltags;
		$this->p['tags'] = $tags;
		$this->p['exts'] = $exts;
		
		// remove any duplicate fields
		$this->p['fields'] = array_unique($fields);
	}

	// This method sets up and executes the database query that does all the work
	function workhorse() {
		global $wpdb, $wp_version, $post;
		if ($wp_version < 2.1) {
			$time_difference = get_settings('gmt_offset');
			$now = gmdate("Y-m-d H:i:s",(time()+($time_difference*3600)));
		}
		//set up bits of the query string
		if ($wp_version < 2.1) {
			$showpage = ($this->p['show_pages']=='true') ? "AND post_status IN ('publish', 'static') " : "AND post_status IN ('publish') ";
		} else {
			$showpage = ($this->p['show_pages']=='true') ? "AND post_status IN ('publish') AND post_type IN ('page', 'post') " : "AND post_status IN ('publish') AND post_type IN ('post') ";
		}	
		$showpass = ($this->p['show_private']=='false') ? "AND post_password ='' " : "";
		$tag_str = $this->p['tag_str'];
		$excluded_cats = $this->p['excluded_cats'];
		$included_cats = $this->p['included_cats'];
		$excluded_authors = $this->p['excluded_authors'];
		$included_authors = $this->p['included_authors'];
		$excluded_posts = trim($this->p['excluded_posts']);
		$match_category = ($this->p['match_cat'] === 'true');
		$utf8 = ($this->p['utf8'] === 'true');
		if ($match_category) {
			$cat_ids = '';
			foreach(get_the_category() as $cat) {
				if ($cat->cat_ID) $cat_ids .= $cat->cat_ID . ',';
			}
			$cat_ids = rtrim($cat_ids, ',');
			if (!$cat_ids) $cat_ids = '9999';
		}
		$match_tags = ($this->p['match_tags'] !== 'false' && $wp_version >= 2.3);
		if ($match_tags) {
			$match_type = $this->p['match_tags'];
		}
		$limit = $this->p['skip'].', '.$this->p['limit'];
		$needed_fields = implode(', ', $this->p['fields']);
		
 		//get the terms to do the matching
		//see if there's a custom field stored for this post
		$postcustom = get_post_custom_values('similarterms');
		if (empty($postcustom)) {
			//if not generate the terms on the fly
			$terms = get_similar_terms($post->post_content, $post->post_title, $utf8, $this->p['bias_title'], $this->p['num_terms']);
		} else {
			//use the stored terms
			$terms = addslashes($postcustom[0]);
		}
		$this->count_terms = substr_count($terms, ' ') + 1;

		// the workhorse...
		$sql = "SELECT DISTINCT " . $needed_fields . ", MATCH (post_title, post_content) AGAINST ( '$terms' ) as score FROM $wpdb->posts "; 
		if (($excluded_cats != '9999') || ($match_category) || ($included_cats != '9999')){ 			
			if ($wp_version < 2.3) {
				$sql .= " LEFT JOIN $wpdb->post2cat ON ID = post_id ";		
			} 
		}
		$sql .= "WHERE MATCH (post_title, post_content) AGAINST ( '$terms' ) ";
		if ($wp_version < 2.1) {
			$sql .= "AND post_date <= '$now' ";
		} else {
			$sql .= "AND post_status != 'future' ";
		}
		if ($match_category) {
			if ($wp_version < 2.3) {
				$sql .= "AND category_id IN ( $cat_ids ) ";
			} else {
				$catarray = explode(',', $cat_ids);
				foreach ( $catarray as $cat ) {
					$catarray = array_merge($catarray, get_term_children($cat, 'category'));
				}
				$catarray = array_unique($catarray);
				$ids = get_objects_in_term($catarray, 'category');
				if ( is_array($ids) && count($ids) > 0 ) {
					$out_posts = "'" . implode("', '", $ids) . "'";
					$sql .= " AND $wpdb->posts.ID IN ($out_posts) ";
				}
			}	
		}
		if ($match_tags && $wp_version >= 2.3) {
			global $post;
			$args = array('fields' => 'ids');
			$tag_ids = wp_get_object_terms($post->ID, 'post_tag', $args);
			if ( is_array($tag_ids) && count($tag_ids) > 0 )  {
				if ($match_type === 'any') {
					$ids = get_objects_in_term($tag_ids, 'post_tag');
				} else {
					$ids = array();
					foreach ($tag_ids as $tag_id){
						if (count($ids) > 0) {
							$ids = array_intersect($ids, get_objects_in_term($tag_id, 'post_tag'));
						} else {
							$ids = get_objects_in_term($tag_id, 'post_tag');
						}	
					}	
				}
				if ($this->p['omit_current_post'] !== 'false') {
					$i = array_search($post->ID, $ids);
					if (is_int($i)) unset($ids[$i]);
				}
				if ( is_array($ids) && count($ids) > 0 ) {
					$ids = array_unique($ids);
					$out_posts = "'" . implode("', '", $ids) . "'";
					$sql .= " AND $wpdb->posts.ID IN ($out_posts) ";
				} 
			} 
		}
		$sql .= $showpage;			
		if ($included_cats != '9999') {
			if ($wp_version < 2.3) {
				$sql .=	"AND (category_id IN ( $included_cats ) && post_author NOT IN ( $excluded_authors ) ) ";
			} else {
				$catarray = explode(',', $included_cats);
				foreach ( $catarray as $cat ) {
					$catarray = array_merge($catarray, get_term_children($cat, 'category'));
				}
				$catarray = array_unique($catarray);
				$ids = get_objects_in_term($catarray, 'category');
				if ( is_array($ids) && count($ids) > 0 ) {
					$in_posts = "'" . implode("', '", $ids) . "'";
					$sql .= " AND $wpdb->posts.ID IN ($in_posts) ";
				}
			}	
		} 
		if ($excluded_cats != '9999') {
			if ($wp_version < 2.3) {
				$sql .=	"AND (category_id NOT IN ( $excluded_cats ) && post_author NOT IN ( $excluded_authors ) ) ";
			} else {
				$catarray = explode(',', $excluded_cats);
				foreach ( $catarray as $cat ) {
					$catarray = array_merge($catarray, get_term_children($cat, 'category'));
				}
				$catarray = array_unique($catarray);
				$ids = get_objects_in_term($catarray, 'category');
				if ( is_array($ids) && count($ids) > 0 ) {
					$out_posts = "'" . implode("', '", $ids) . "'";
					$sql .= " AND $wpdb->posts.ID NOT IN ($out_posts) ";
				}
			}	
		} 
		if ($excluded_authors != '9999'){
			$sql .=	"AND post_author NOT IN ( $excluded_authors ) ";
		}
		if ($included_authors != '9999'){
			$sql .=	"AND post_author IN ( $included_authors ) ";
		}
		if ($excluded_posts != '') {
			$sql .= "AND ID NOT IN ( $excluded_posts ) ";
		}
		if ( '' != $tag_str && $wp_version >= 2.3) {
			if ( strpos($tag_str, ',') !== false ) {
				$intags = explode(',', $tag_str);
				foreach ( (array) $intags as $tag ) {
					$tags[] = sanitize_term_field('name', $tag, 0, 'post_tag', 'db');
				}
				$tag_type = 'any';
			} else if ( strpos($tag_str, '+') !== false ) {
				$intags = explode('+', $tag_str);
				foreach ( (array) $intags as $tag ) {
					$tags[] = sanitize_term_field('name', $tag, 0, 'post_tag', 'db');
				}
				$tag_type = 'all';
			} else {
				$tags[] = sanitize_term_field('name', $tag_str, 0, 'post_tag', 'db');
				$tag_type = 'any';
			}
			$ids = array();
			if ($tag_type == 'any') {
				foreach ($tags as $tag){
					if (is_term($tag, 'post_tag')) {
						$t = get_term_by('name', $tag, 'post_tag');
						$ids = array_merge($ids, get_objects_in_term($t->term_id, 'post_tag'));
					}	
				}	
			} else {
				foreach ($tags as $tag){
					if (is_term($tag, 'post_tag')) {
						$t = get_term_by('name', $tag, 'post_tag');
						if (count($ids) > 0) {
							$ids = array_intersect($ids, get_objects_in_term($t->term_id, 'post_tag'));
						} else {
							$ids = get_objects_in_term($t->term_id, 'post_tag');
						}
					}	
				}	
			}
			if ( is_array($ids) && count($ids) > 0 ) {
				$ids = array_unique($ids);
				$out_posts = "'" . implode("', '", $ids) . "'";
				$sql .= " AND $wpdb->posts.ID IN ($out_posts) ";
			} else $sql .= " AND 1 = 2 ";
		}
		if ($this->p['omit_current_post'] !== 'false') {
			$sql .= "AND ID != '$post->ID' ";
		}
		$sql .= $showpass . "LIMIT $limit";
	    $this->results = $wpdb->get_results($sql);
	}
	
	function format_date($date, $fmt) {
			if ($fmt === '') $fmt = get_settings('date_format');
			$d = mysql2date($fmt, $date);
			$d = apply_filters('get_the_time', $d, $fmt);
			return apply_filters('the_time', $d, $fmt);
	}

	function regescape($s) {
			$s = str_replace('\\', '\\\\', $s);
			$s = str_replace('/', '\\/', $s);
			$s = str_replace('[', '\\[', $s);
			$s = str_replace(']', '\\]', $s);
			return $s;
	}

	function strip_special_tags($text, $stripcodes) {
			$numtags = count($stripcodes);
			for ($i = 0; $i < $numtags; $i++) {
				if (!$stripcodes[$i]['start'] || !$stripcodes[$i]['end']) return $text;
				$pattern = '/('. $this->regescape($stripcodes[$i]['start']) . '(.*?)' . $this->regescape($stripcodes[$i]['end']) . ')/i';
				$text = preg_replace($pattern, '', $text);
			}
			return $text;
	}

	function trim_excerpt($content, $len) {
		// taken from the wp_trim_excerpt filter
		$text = $content;
		$text = apply_filters('the_content', $text);
		$text = str_replace(']]>', ']]&gt;', $text);
		$text = strip_tags($text);
		if (!$len) $len = 55; 
		$excerpt_length = $len;
		$words = explode(' ', $text, $excerpt_length + 1);
		if (count($words) > $excerpt_length) {
			array_pop($words);
			$text = implode(' ', $words);
		}
		$text = convert_smilies($text);
		return $text;
	}
	
	function format_snippet($content, $p, $trim, $len) {
		$content = strip_tags($content);
		$content = $this->strip_special_tags($content, $this->p['stripcodes']);
		$content = stripslashes($content);
		// grab a maximum number of characters
		$snippet = substr($content, 0, $len);
		if ($trim == 'word' && strlen($snippet) == $len) {
			// trim back to the last full word--NB if our snippet ends on a word
			// boundary we still have to trim back to the non-word character
			// (the final 's' in the pattern makes sure we match newlines)
			preg_match('/^(.*)\W/s', $snippet, $matches);
			//if we can't get a single full word we use the full snippet
			// (we use $matches[1] because we don't want the white-space)
			if ($matches[1]) $snippet = $matches[1];
		}
		return $snippet;
	}

	function output() {
		global $wpdb;
		$output = ''; 
	    if ($this->results) {
			foreach ($this->results as $result) {
				$translations = array();
				$numtags = count($this->p['tags']);
				for ($i = 0; $i < $numtags; $i++) {
					$fulltag = $this->p['fulltags'][$i];
					$tag = $this->p['tags'][$i]; 
					$ext = $this->p['exts'][$i];
					switch ($tag) {
					case 'score':
						$value = sprintf("%.0f", 37.0 * $result->score / $this->count_terms);	
						break;
					case 'title':
						$value = apply_filters('the_title', $result->post_title);	
						if ($ext) {
							$value = wordwrap($value, $ext, '<br />');
						}
						break;
					case 'url':
						$value = apply_filters('the_permalink', get_permalink($result->ID));
						break;
					case 'author':
						$value = get_author_name($result->post_author);	
						break;
					case 'date':
						$value = $this->format_date($result->post_date, $ext);
						break;
					case 'dateedited':
						$value = $this->format_date($result->post_modified, $ext);
						break;
					case 'excerpt':
						$value = trim($result->post_excerpt) ;
						if ($value == '') $value = $result->post_content;
						$value = $this->trim_excerpt($value, $ext);
						break;
					case 'snippet':
						if (!$ext) $ext = 100;
						$value = $this->format_snippet($result->post_content, $p, 'char', $ext);
						break;
					case 'snippetword':
						if (!$ext) $ext = 100;
						$value = $this->format_snippet($result->post_content, $p, 'word', $ext);
						break;
					case 'fullpost':
						$value = apply_filters('the_content', $result->post_content);
						$value = str_replace(']]>', ']]&gt;', $value);
						break;
					case 'commentcount':
						$value = $result->comment_count;
						if ($ext) {
							$s = explode(':', $ext);
							if (count($s) == 3) {
								if ($value == 0) $value = $s[0];
								elseif ($value == 1) $value .= ' ' . $s[1];
								else $value .= ' ' . $s[2];
							}
						}
						break;
					case 'categorylinks':
						$cats = get_the_category($result->ID);
						$value = ''; $n = 0;
						foreach ($cats as $cat) {
							if ($n > 0) $value .= $ext;
							$value .= '<a href="' . get_category_link($cat->cat_ID) . '" title="' . sprintf(__("View all posts in %s"), $cat->cat_name) . '" rel="category tag">'.$cat->cat_name.'</a>';
							++$n;
						}
						break;
					case 'catnames':
						$cats = get_the_category($result->ID);
						$value = ''; $n = 0;
						foreach ($cats as $cat) {
							if ($n > 0) $value .= $ext;
							$value .= $cat->cat_name;
							++$n;
						}
						break;
					case 'custom':
						$custom = get_post_custom($result->ID);
						$value = $custom[$ext][0];
						break;
					case 'tags':
						$tags = (array) get_the_tags($result->ID);
						$tag_list = array();
						foreach ( $tags as $tag ) {
							$tag_list[] = $tag->name;
						}
						if (!$ext) $ext = ', ';
						$tag_list = join( $ext, $tag_list );
						$value = $tag_list;
						break;
					case 'taglinks':
						$tags = (array) get_the_tags($result->ID);						$tag_list = '';
						$tag_links = array();
						foreach ( $tags as $tag ) {
							$link = get_tag_link($tag->term_id);
							if ( is_wp_error( $link ) )
								return $link;
							$tag_links[] = '<a href="' . $link . '" rel="tag">' . $tag->name . '</a>';
						}
						if (!$ext) $ext = ' ';
						$tag_links = join( $ext, $tag_links );
						$tag_links = apply_filters( 'the_tags', $tag_links );
						$tag_list .= $tag_links;
						$value = $tag_list;
						break;
					case 'link':
						$ttl = stripslashes(apply_filters('the_title', $result->post_title));
						$pml = apply_filters('the_permalink', get_permalink($result->ID));
						$pdt = $this->format_date($result->post_date, $ext);
						$value = "<a href=\"$pml\" rel=\"bookmark\" title=\"$pdt\">$ttl</a>";
						break;
					default:
						// does not match our list of tags
						die('an unrecognised tag has been detected in the output template');
					}
					$translations[$fulltag] = $value;
				}
				// Replace every valid tag with its value
				$output .= strtr($this->p['output_template'], $translations);
				$output .= "\n";
			} 
			// If stuff is to be trimmed off the front...
			if ($this->p['trim_before']!=='') {
				// check it is actually there...
				if (strpos($output, $this->p['trim_before']) === 0) {
					// then snip it
					$output = substr($output, strlen($this->p['trim_before']));
				}
			}
			// finally display it all
			return $this->p['prefix'] . $output . $this->p['suffix'] . sprintf("<!--Similar Posts took %.3f seconds-->", $this->getmicrotime() - $this->running_time);
		} else {
			// or display the blank message
	        return $this->p['prefix'] . $this->p['none_text'] . $this->p['suffix'] . sprintf("<!--Similar Posts took %.3f seconds-->", $this->getmicrotime() - $this->running_time);
		}
	}
	
	function getmicrotime() {
	    // split output from microtime() on a space
	    list($usec, $sec) = explode(" ", microtime());
	    // append in correct order
	    return ((float)$usec + (float)$sec); 
	} 

}

global $overusedwords;

function get_words($string, $utf8) {
	//tidy up the string a little 
	$string = addslashes(strip_tags($string));
	//make an array of all the words in the string (because we have used addslashes above this will chop off apostrophised endings)
	if ($utf8) {
		// handle extended characters...
		if (function_exists('mb_strtolower')) {
			// we can do it two ways: first if the mb functions are available ...
			@ $string = mb_strtolower($string, mb_detect_encoding($string));
			mb_regex_encoding("utf-8");
			$wordlist = mb_split("\W+" ,$string);
		} else {
			// ... a less reliable way
			preg_match_all('/\w+/u', $string, $matches);
			$wordlist = $matches[0];
		}
	} else {
		// plain ordinary PHP function
		$wordlist = str_word_count(strtolower($string), 1);
	}
	return $wordlist;
}

/*
	Takes a string, strips it of html, and produces list of the 20 most used words (common words ignored)
*/
function get_similar_terms($content, $title, $utf8 = false, $biastitle = 'equal', $num_terms = 20) {
	global $overusedwords;
	if ($biastitle === 'equal') {
		$string = $content . ' ' . $title;
	} else {
		$string = $content;
	}
	$wordlist = get_words($string, $utf8);
 	//count them, word=>count
	$wordtable = array_count_values($wordlist);
	//knock out the noise words ... the $overusedwords array was loaded from a file to allow for different languages
	foreach ($overusedwords as $word) {
		unset($wordtable[$word]); 
	}
	// knock out words of three or less characters since mysql ignores them for full text searches
	if ($utf){
		foreach ($wordtable as $word => $freq) {
			if (strlen(utf8_decode($word)) < 4) {
				unset($wordtable[$word]);
			}
		}
	} else {
		foreach ($wordtable as $word => $freq) {
			if (strlen($word) < 4) {
				unset($wordtable[$word]);
			}
		}
	}
	//sort by count
	arsort($wordtable);
	
	//convert the most used words into a list 'term1 term1 term1 term2 term2 term3' etc.
	if ($num_terms < 1) $num_terms = 1;
	$terms = '';
	$num = 0;
	foreach ($wordtable as $word => $count) {
		for ($i = 1; $i <= $count; $i++) {
			$terms .= ' ' . $word;
		}
		$num++;
		if ($num >= $num_terms) break;
	}
	$terms = ltrim($terms);
	
	if ($biastitle === 'high') {
		$titlewords = get_words($title, $utf8);
		$terms .= ' ' . implode(' ', $titlewords);
	}
	return addslashes($terms);
}

/*
	Given the ID of a post, gets its most common words and stores them in a custom field called 'similarterms'
*/
function save_similar_terms($postID) {
	global $wpdb;
	//get the post content and title
	$content = $wpdb->get_row("SELECT post_content, post_title FROM $wpdb->posts WHERE ID = $postID", ARRAY_A);
	//extract its terms
	$options = get_option('similar-posts');
	$terms = get_similar_terms($content['post_content'], $content['post_title'], $options['utf8'] == 'true', $options['bias_title'], $options['num_terms']);
	//check to see if the field is set
	$metaid = $wpdb->get_var("SELECT meta_id FROM $wpdb->postmeta WHERE post_id=$postID AND meta_key='similarterms' limit 1");
	//then insert if empty
	if (is_null($metaid)) {
		$wpdb->query("INSERT INTO $wpdb->postmeta (post_id, meta_key, meta_value) VALUES ($postID, 'similarterms', \"$terms\")");
	} 
	return $postID;
}

/*
	Given the ID of a post delete its custom field, 'similarterms'
*/
function delete_similar_terms($postID) {
	global $wpdb;
	//check to see if that post has its custom field set
	$metaid = $wpdb->get_var("SELECT meta_id FROM $wpdb->postmeta WHERE post_id=$postID AND meta_key='similarterms' limit 1");
	if (!is_null($metaid)) {
		//then delete the custom field
		$wpdb->query("DELETE FROM $wpdb->postmeta WHERE post_id = $postID AND meta_key = 'similarterms'");
	}
	return $postID;
}

function similar_posts_init() {
	//install the similarterm actions into WordPress
	add_action('save_post', 'save_similar_terms', 1);
	add_action('delete_post', 'delete_similar_terms', 1);
	if ($wp_db_version < 3308 ) { 
		add_action('edit_post', 'save_similar_terms', 1);
		add_action('publish_post', 'save_similar_terms', 1);
	} else {
		add_action('save_post', 'save_similar_terms', 1);
	}

	//the next lines find the language WordPress is using
	$language = WPLANG;
	//if no language is specified make it the default which is 'en'
	if ($language == '') {
		$language = 'en';
	}
	$overusedwordsfile = $language.'.words.php';
	//see if there is a file of overused words in that language in the right directory
	if (!file_exists(dirname(__FILE__) . DIRECTORY_SEPARATOR . $overusedwordsfile)) {
		//if not revert to the default file
		$overusedwordsfile = 'en.words.php';
}

global $overusedwords;
//the file contains a single php command to create the $overusedwords array -- see 'en.words.php' for an example
require_once($overusedwordsfile);

}

//called when all plugins have loaded
add_action('plugins_loaded', 'similar_posts_init', 0);

if ( is_admin() ) {
	require(dirname(__FILE__).'/similar-posts-admin.php');
}
function widget_rrm_similar_posts_init() {
	if (! function_exists("register_sidebar_widget")) {
		return;
	}
	function widget_rrm_similar_posts($args) {
		extract($args);
		$options = get_option('widget_rrm_similar_posts');
		$title = empty($options['title']) ? __('Similar Posts') : $options['title'];
		if ( !$number = (int) $options['number'] )
			$number = 10;
		else if ( $number < 1 )
			$number = 1;
		else if ( $number > 15 )
			$number = 15;
		echo $before_widget;
		echo $before_title.$title.$after_title;
		similar_posts('limit='.$number);
		echo $after_widget;
	}
	function widget_rrm_similar_posts_control() {
		if ( $_POST['widget_rrm_similar_posts_submit'] ) {
			$options['title'] = strip_tags(stripslashes($_POST['widget_rrm_similar_posts_title']));
			$options['number'] = (int) $_POST["widget_rrm_similar_posts_number"];
			update_option("widget_rrm_similar_posts", $options);
		} else {
			$options = get_option('widget_rrm_similar_posts');
		}		
		$title = attribute_escape($options['title']);
		if ( !$number = (int) $options['number'] )
			$number = 5;
		?>
		<p><label for="widget_rrm_similar_posts_title"> <?php _e('Title:'); ?> <input style="width: 250px;" id="widget_rrm_similar_posts_title" name="widget_rrm_similar_posts_title" type="text" value="<?php echo $title; ?>" /></label></p>
		<p><label for="widget_rrm_similar_posts_number"> <?php _e('Number of posts to show:'); ?> <input style="width: 25px; text-align: center;" id="widget_rrm_similar_posts_number" name="widget_rrm_similar_posts_number" type="text" value="<?php echo $number; ?>" /></label> <?php _e('(at most 15)'); ?> </p>
		<input type="hidden" id="widget_rrm_similar_posts_submit" name="widget_rrm_similar_posts_submit" value="1" />
		There are many more <a href="options-general.php?page=Similar_Posts/similar-posts.php">options</a> available.
		<?php
	}
	register_sidebar_widget('Similar Posts', 'widget_rrm_similar_posts');
	register_widget_control('Similar Posts', 'widget_rrm_similar_posts_control', 300, 100);
}

add_action('plugins_loaded', 'widget_rrm_similar_posts_init');

?>