2010/02/08(Mon) 23:29:23 編集(投稿者)
※単語をタスキ掛けでGoogleSuggestしますので、あまり長文でテストしない方が良いかもしれません。
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>日本語テキストからキーワードを抽出するテスト</title>
</head>
<script type="text/javascript" charset="UTF-8">
function KeywordArray() {}
KeywordArray.prototype = new Array();
KeywordArray.prototype.mergePush = function(object){
for (var i = 0; i < this.length; i++) {
if (object == this[i].keyword) return false;
}
return this.push({keyword:object, degree:0, count:0});
}
KeywordArray.prototype.parseContent = function(object){
var o = object.toLowerCase();
var reg = new RegExp('([一-龠]{2,}|[ァ-ヶー]{2,}|[0-9a-zA-Z]{4,})', 'g');
while ((m = reg.exec(o)) != null) {
this.mergePush(m[0]);
}
}
KeywordArray.prototype.request = function(){
for (var i = 0; i < this.length; i++) {
for (var j = i+1; j < this.length; j++) {
suggestions.push({keywords:this[i].keyword + " " + this[j].keyword, suggestion:null});
var url = "http://suggestqueries.google.com/complete/search?hl=ja&json=t&jsonp=callback&qu=" + this[i].keyword + "+" + this[j].keyword;
var script = document.createElement('script');
script.setAttribute('src', url);
document.getElementsByTagName('head')[0].appendChild(script);
}
}
};
KeywordArray.prototype.customSort = function(){
this.sort(
function(a,b){
if (b.degree!=a.degree){
return b.degree - a.degree;
} else {
return b.count - a.count;
// var t=10000000;
// return ((b.count>t)?-b.count:b.count)-((a.count>t)?-a.count:a.count);
}
});
}
KeywordArray.prototype.toString = function(){
var s="";
for (var i = 0; i < this.length; i++) {
s += "keyword[" + i + "]=" + this[i].keyword;
s += "@degree=" + this[i].degree;
s += "@count=" + this[i].count + "<br>";
}
return s;
}
function SuggestionArray() {}
SuggestionArray.prototype = new Array();
SuggestionArray.prototype.response = function(object){
for (var i = 0; i < this.length; i++) {
if (this[i].keywords == object[0].toLowerCase()) {
if (!this[i].suggestion) this[i].suggestion = new Array();
this[i].suggestion.push({suggest:object[1], count:object[2]});
break;
}
}
}
SuggestionArray.prototype.receivedAllResponses = function(){
for (var i = 0; i < this.length; i++) {
if (!this[i].suggestion) {
return false;
}
}
return true;
}
SuggestionArray.prototype.parse = function(){
for (var i = 0; i < this.length; i++) {
for (var j = 0; j < this[i].suggestion.length; j++) {
for (var k = 0; k < this[i].suggestion[j].suggest.length; k++){
var arr = this[i].suggestion[j].suggest[k].split(" ");
for (var l = 0; l < arr.length; l++){
for (var m = 0; m < keywords.length; m++) {
if (arr[l].toLowerCase() == keywords[m].keyword){
keywords[m].degree ++;
keywords[m].count += parseInt(this[i].suggestion[j].count[k].replace(/[^0-9]/g, ""));
}
}
}
}
}
}
}
SuggestionArray.prototype.toString = function(){
var s="";
for (var i = 0; i < this.length; i++) {
s += "keywords=" + this[i].keywords.toString() + "<br>";
for (var j = 0; j < this[i].suggestion.length; j++) {
for (var k = 0; k < this[i].suggestion[j].suggest.length; k++) {
s += this[i].suggestion[j].suggest[k].toString() + "@";
s += this[i].suggestion[j].count[k].toString() + "<br>";
}
}
s += "<br>";
}
return s;
}
function callback(data) {
suggestions.response(data);
if (suggestions.receivedAllResponses()) {
suggestions.parse();
keywords.customSort();
document.getElementById("output").innerHTML = keywords.toString();
}
}
var keywords;
var suggestions;
function Query() {
keywords = new KeywordArray();
suggestions = new SuggestionArray();
document.getElementById("output").innerHTML = "";
keywords.parseContent(document.getElementById("input").value);
keywords.request();
}
function Clear() {
var i = document.getElementById("input");
var o = document.getElementById("output");
i.value = "";
o.style.display = "none";
o.innerHTML = "";
}
function Debug() {
var o = document.getElementById("output");
o.style.display = "block";
o.innerHTML = keywords.toString() + "<br>" + suggestions.toString();
}
</script>
<body>
<h1>日本語テキストからキーワードを抽出するテスト</h1>
<div class="main">
<br>
<p></p>
<p>
<h2>デモ</h2>
<p>日本語の文章を入力し、問い合わせボタンをクリックしてください。</p>
<textarea id="input" cols="40" rows="5"></textarea>
<p>
<input type="submit" value="問い合わせ" onclick="Query()">
<input type="submit" value="クリア" onclick="Clear()">
<input type="submit" value="debug" onclick="Debug()">
</p>
</p>
<p>
<div id="output" style="width:91%; margin-left: 1.5em; border-width:1px; border: dotted 1px;"></div>
</p>
</body>
</html>