Reasearch trough the latest zeitgeist

$DB_HOST = "localhost";
$DB_USER = "j";
$DB_PASS = "rs232c";
$DB_DBNAME = "rechos";

mysql_connect($DB_HOST, $DB_USER, $DB_PASS);
mysql_select_db($DB_DBNAME);

$data_text[] = '';
$data_count[] = '';

// se creer un outils pour classer les mots / dictionnaire custom
$excluded_words = array("the","of","and","in","a","to","is","it", "via", "for", "on", "by", "at","as","be","an","are","that", "with","you","into","this","from","also","its","which","but", "some", "their", "if","or","then","any","about","other","i","s","d",
"we","they","t","not","your","when","all","there","de","his","how","no","so","he","what","here","just","them","who","us","our","more","will","up","out","could","each","my","these","now","do","ve","where","over","m","only","own","b","many","me","very","ll","e",
"la","les","je","o","l","y","c","until","able","en","got","p","has","was","can","have",
"eacute","rsquo","ldquo","rdquo",
"even","et","un","du","est","had","than","one","like","see","been","des","really","those","set","use","after","yet","seen","qui","que","quoi","quand","ou",
"old","may","le","her","ces","am","ago",
"new","using","would","news","made","com","through","different","re","two","being","first","well","much","such","back","go","around","read","while","get","most","lsquo","know","posted","don","good","called","every","next","org","since","top","say","both","says","something","v","x","again","change","etc","still","want","above","does","itself","last","often","r","though","too","put","run",
"used", "taken", "should", "recent", "raby", "pm", "plankton", "originally", "makes", "having", "full", "doing", "dunne", "current", "based", "away", "apparently", "writing", "were", "under", "same", "per", "pas", "pac", "need", "inside", "behind", "against", "past", "looking", "going", "always", "ce", "think", "between", "available", "actually",
'agrave', 'make', 'because', 'kind', 'soon', 'created', 'designed', 'powered');

$max_element = 80;

$query = "select * from wp_posts ORDER BY id DESC LIMIT $max_element ;";
$result = mysql_query($query);
while ($posts_result = mysql_fetch_assoc($result)) {

// ALSO: remove anything not an alphabetical character -> ascii code

$string = $posts_result['post_content'];
$string = strtolower(strip_tags($string));
$buffer_text = $string;
$buffer_text = str_replace(”",”",$buffer_text);
$buffer_text = ereg_replace(”[^a-zA-Z]“, ” “, $buffer_text);

$output_text .= $buffer_text;
}

$output_text_array = explode(” “,$output_text);

foreach($output_text_array as $output_element) {
// echo “$output_element
“;

// check if already existing in the new array
// if not existing -> copy it in the new array, and 1 in the counter array

if (in_array($output_element, $data_text)) {
// if existing -> add +1 in the counter array list
// get the position in the array
$data_pos = array_search($output_element, $data_text);
$data_count[$data_pos] = $data_count[$data_pos]+1;
} else {
// if not existing -> copy it in the new array, and 1 in the counter array
if (!in_array($output_element, $excluded_words)) {
$data_text[] = $output_element;
$data_count[]= 1;
}
}
}

array_multisort($data_count, SORT_DESC, $data_text);

$min_occurence =10;
foreach ($data_text as $text) {

$i++;
if ($data_count[$i] > $min_occurence) {

echo “{$data_text[$i]} ({$data_count[$i]}) “;
}
}

//

echo “Scan the last $max_element posts, and retrieve the words used more than $min_occurence times, providing a link to the related search.“;
?>




Leave a Reply

R-Echos

Since 2004, R-Echos is an experimental online magazine dedicated to republication; topics vary from biology to graphic design, from ecology to business. It agglomerates anything which is about art, computing, science. His form is made out of collages of texts, links, images, references, videos and sounds - choosen with care to take part to this very personnal publication.

* Electronest

  • About
  • Articles
  • Beta version
  • Categories
  • Defragmentation
  • Index
  • Monthly Archives
  • R-Echos issue 1
  • Somewhere else
  • Tags
  • Visual Index
  • Visualisation
  • Collections

  • Displaying
  • un-Realisation
  • Physical Interface
  • Augmented Reality
  • Publishing
  • Geometry
  • Visualisation
  • Recently republished | Most Read

    Subscribe in a reader

    Enter your email address:

    Delivered by FeedBurner