「集合知プログラミング」
買いました!
データのシミュレーションや分析には興味があったので
といっても
ただ、エクセルで表とかグラフとか作るだけじゃつまんない
エンジニアとして
どや!!って言える仕事がしたい
なので、集合知プログラミングでエンジニアっぽい分析を習得しようと思います
この集合知プログラミングはPythonで書かれております。
僕は主にPHPを使って開発をしていますし
というか、PHPしかつかってないし。。
ただ単にPythonを写経するだけだと何にも身につかないと思うので
サンプルコードをPHPで書き直しながら進めていきたいと思っています。
まずはデータ・セットの作成
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$critics=array('Lisa Rose'=> array('Lady in the Water' =>2.5, 'Snakes on a Plane' =>3.5, | |
'Just My Luck'=> 3.0, 'Superman Returns'=> 3.5, 'You, Me and Dupree'=> 2.5, | |
'The Night Listener'=> 3.0), | |
'Gene Seymour' => array('Lady in the Water'=> 3.0, 'Snakes on a Plane'=> 3.5, | |
'Just My Luck'=> 1.5, 'Superman Returns'=> 5.0, 'The Night Listener'=> 3.0, | |
'You, Me and Dupree'=> 3.5), | |
'Michael Phillips'=> array('Lady in the Water'=> 2.5, 'Snakes on a Plane'=> 3.0, | |
'Superman Returns'=> 3.5, 'The Night Listener'=> 4.0), | |
'Claudia Puig'=> array('Snakes on a Plane'=> 3.5, 'Just My Luck'=> 3.0, | |
'The Night Listener'=> 4.5, 'Superman Returns'=> 4.0, | |
'You, Me and Dupree'=> 2.5), | |
'Mick LaSalle'=> array('Lady in the Water'=> 3.0, 'Snakes on a Plane'=> 4.0, | |
'Just My Luck'=> 2.0, 'Superman Returns'=> 3.0, 'The Night Listener'=> 3.0, | |
'You, Me and Dupree'=> 2.0), | |
'Jack Matthews'=> array('Lady in the Water'=> 3.0, 'Snakes on a Plane'=> 4.0, | |
'The Night Listener'=> 3.0, 'Superman Returns'=> 5.0, 'You, Me and Dupree'=> 3.5), | |
'Toby'=> array('Snakes on a Plane'=>4.5,'You, Me and Dupree'=>1.0,'Superman Returns'=>4.0)); | |
リストの中から最も好みの似ている評価者を選び出すtopMatchesを定義。
var_dump($result)でTobyに似ているユーザーをarrayで返してくれる
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function sim_distance($person1, $person2){ | |
$si = array(); | |
foreach ($person1 as $k => $v) { | |
if (array_key_exists($k, $person2)) { | |
$si[$k] = 1; | |
} | |
} | |
if (count($si) == 0) { | |
return 0; | |
} | |
$squares = array(); | |
foreach($si as $k => $v) { | |
$squares[] = pow(($person1[$k] - $person2[$k]), 2); | |
} | |
$sum_of_squares = array_sum($squares); | |
return 1/(1 + $sum_of_squares); | |
} | |
function sim_pearson($p1, $p2) { | |
$si = array(); | |
foreach ($p1 as $k => $v) { | |
if (array_key_exists($k, $p2)) { | |
$si[$k] = 1; | |
} | |
} | |
if (count($si) == 0) { | |
return 0; | |
} | |
$sum1 = $sum2 = $sum1Sq = $sum2Sq = $pSum = 0; | |
foreach ($si as $k => $v) { | |
$sum1 = $sum1 + $p1[$k]; | |
$sum2 = $sum2 + $p2[$k]; | |
$sum1Sq = $sum1Sq + $p1[$k]*$p1[$k]; | |
$sum2Sq = $sum2Sq + $p2[$k]*$p2[$k]; | |
$pSum = $pSum + $p1[$k]*$p2[$k]; | |
} | |
$num = $pSum -($sum1*$sum2/count($si)); | |
$den=sqrt(($sum1Sq-pow($sum1, 2)/count($si))*($sum2Sq-pow($sum2, 2)/count($si))); | |
if($den == 0) { | |
return 0; | |
} | |
$r = $num / $den; | |
return $r; | |
} | |
function topMatches($array, $name1, $number, $type){ | |
foreach($array as $key => $v){ | |
if($key != $name1) { | |
$other = $key; | |
if ($type == 'pearson') { | |
$score[$name1][$other] = sim_pearson($array[$name1], $array[$other]); | |
} elseif ($tyep == 'distance') { | |
$score[$name1][$other] = sim_distance($array[$name1], $array[$other]); | |
}else{ | |
echo '正しい評価方法を指定してください'; | |
exit; | |
} | |
} | |
} | |
arsort($score[$name1]); | |
$output = array_slice($score[$name1], 0, $number, true); | |
return $output; | |
} | |
$result = topMatches($critics, 'Toby', 3, 'pearson'); | |
var_dump($result); |
その重みづけスコアと書く評価者の評価点を掛けあわせて映画のタイトルごとにスコアを算出する
もっともスコアの高い映画が、対象とするユーザーにオススメの映画となる
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getRecommendations($array, $person, $type) { | |
$totals = array(); | |
$simSums = array(); | |
$rankings = array(); | |
foreach ($array as $other => $v) { | |
if ($other == $person ){ | |
continue; | |
} elseif($type == 'pearson') { | |
$simulate = sim_pearson($array[$person], $array[$other]); | |
if($simulate <= 0) { | |
continue; | |
} else { | |
$sim = $simulate; | |
} | |
} elseif($type == 'distance'){ | |
$simulate = sim_distance($array[$person], $array[$other]); | |
if($simulate <= 0) { | |
continue; | |
} else { | |
$sim = $simulate; | |
} | |
} else { | |
echo '正しい評価方法を指定してください'; | |
exit; | |
} | |
foreach ($array[$other] as $item => $value) { | |
if(!isset($totals[$item])) { | |
$totals[$item] = 0; | |
} | |
if(!isset($simSums[$item])) { | |
$simSums[$item] = 0; | |
} | |
if (!array_key_exists($item, $array[$person]) || $array[$person][$item] == 0) { | |
$totals[$item] += $array[$other][$item]*$sim; | |
$simSums[$item] += $sim; | |
} | |
} | |
} | |
foreach ($totals as $item => $total) { | |
if($simSums[$item] != 0) { | |
$rankings[$item] = $total /$simSums[$item]; | |
} | |
} | |
arsort($rankings); | |
return $rankings; | |
} | |
$result = getRecommendations($critics, 'Toby', 'pearson'); | |
var_dump($result); |
以前に定義したtopMatches関数を使って、似た属性をもつ映画をランキング形式で表示できる。
また、映画が誰に見られるべきかをgetRecommendationsを使って表示することができる
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function transformPrefs($critics) { | |
$result = array(); | |
foreach ($critics as $key => $person) { | |
foreach ($person as $item => $value) { | |
if(!isset($result[$item])){ | |
$resutl[$item] = array(); | |
} | |
$result[$item][$key] = $value; | |
} | |
} | |
return $result; | |
} | |
$movies = transformPrefs($critics); | |
$result = topMatches($movies, 'Superman Returns', 4, 'pearson'); | |
var_dump($result); | |
$recomend_person = getRecommendations($movies, 'Just My Luck', 'pearson'); | |
var_dump($recomend_person); |
0 件のコメント:
コメントを投稿