{"id":3244,"date":"2022-02-27T13:49:53","date_gmt":"2022-02-27T05:49:53","guid":{"rendered":"https:\/\/egonlin.com\/?p=3244"},"modified":"2022-02-27T13:49:53","modified_gmt":"2022-02-27T05:49:53","slug":"%e7%ac%ac%e4%ba%8c%e8%8a%82%ef%bc%9a%e5%9f%ba%e4%ba%8e%e5%8d%8f%e5%90%8c%e8%bf%87%e6%bb%a4%e7%9a%84%e6%8e%a8%e8%8d%90%e7%ae%97%e6%b3%95","status":"publish","type":"post","link":"https:\/\/egonlin.com\/?p=3244","title":{"rendered":"\u7b2c\u4e8c\u8282\uff1a\u57fa\u4e8e\u534f\u540c\u8fc7\u6ee4\u7684\u63a8\u8350\u7b97\u6cd5"},"content":{"rendered":"<h1>\u57fa\u4e8e\u534f\u540c\u8fc7\u6ee4\u7684\u63a8\u8350\u7b97\u6cd5<\/h1>\n<p>&emsp;&emsp;\u672c\u63a8\u8350\u7cfb\u7edf\u91c7\u7528\u4e2d\u7b49\u5927\u5c0f\u7684MovieLens\u6570\u636e\u96c6\uff0c\u8be5\u6570\u636e\u96c6\u5305\u542b6000\u591a\u7528\u6237\u5bf94000\u591a\u90e8\u7535\u5f71\u7684100\u4e07\u6761\u8bc4\u5206\u3002\u8be5\u6570\u636e\u96c6\u662f\u4e00\u4e2a\u8bc4\u5206\u6570\u636e\u96c6\uff0c\u7528\u6237\u53ef\u4ee5\u7ed9\u7535\u5f71\u8bc45\u4e2a\u4e0d\u540c\u7b49\u7ea7\u7684\u5206\u6570\uff081~5\u5206\uff09\u3002\u672c\u7bc7\u6587\u7ae0\u4e3b\u8981\u7814\u7a76\u9690\u53cd\u9988\u6570\u636e\u96c6\u4e2d\u7684TopN\u63a8\u8350\u95ee\u9898\uff0c\u56e0\u6b64\u5c06\u4f1a\u5ffd\u7565\u6570\u636e\u96c6\u4e2d\u7684\u8bc4\u5206\u8bb0\u5f55\u3002\u4e5f\u5c31\u662f\u8bf4\uff0cTopN\u63a8\u8350\u7684\u4efb\u52a1\u662f\u9884\u6d4b\u7528\u6237\u4f1a\u4e0d\u4f1a\u5bf9\u67d0\u90e8\u7535\u5f71\u8bc4\u5206\uff0c\u800c\u4e0d\u662f\u9884\u6d4b\u7528\u6237\u5728\u51c6\u5907\u5bf9\u67d0\u90e8\u7535\u5f71\u8bc4\u5206\u7684\u524d\u63d0\u4e0b\u5bf9\u7535\u5f71\u8bc4\u591a\u5c11\u5206\u3002<\/p>\n<h1>\u5b9e\u9a8c\u8bbe\u8ba1\u2014\u2014\u8bad\u7ec3\u96c6M\u6298\u4ea4\u53c9\u9a8c\u8bc1<\/h1>\n<pre><code class=\"language-python\">from numpy import random\n\ndef split_data(data, M, k, seed):\n    &quot;&quot;&quot;\n    \u5207\u5272\u8bad\u7ec3\u96c6\uff0c\u9632\u6b62\u8fc7\u62df\u5408\n    data: \u8bad\u7ec3\u96c6\n    M: \u5207\u5272\u8bad\u7ec3\u96c6\u7684\u4efd\u6570 \n    k: \u6d4b\u8bd5\u96c6\u7684\u7d22\u5f15\n    seed: \u968f\u673a\u6570\u79cd\u5b50\n    &quot;&quot;&quot;\n    test = []\n    train = []\n    random.seed(seed)\n\n    for user, item in data:\n        if random.randint(0, M) == k:\n            test.append([user, item])\n        else:\n            train.append([user, item])\n\n    return train, test<\/code><\/pre>\n<h1>\u8bc4\u6d4b\u6307\u6807<\/h1>\n<h2>\u51c6\u786e\u7387\/\u53ec\u56de\u7387<\/h2>\n<p>&emsp;&emsp;\u5bf9\u7528\u6237$u$\u63a8\u8350$N$\u4e2a\u7269\u54c1\uff0c\u8bb0\u4f5c$R(u)$\uff0c\u7528\u6237$u$\u5728\u6d4b\u8bd5\u96c6\u4e0a\u559c\u6b22\u7684\u7269\u54c1\u96c6\u5408\u4e3a$T(u)$\uff0c\u53ef\u4ee5\u4f7f\u7528\u51c6\u786e\u7387\/\u53ec\u56de\u7387\u8bc4\u6d4b\u63a8\u8350\u7b97\u6cd5\u7684\u7cbe\u5ea6\u3002<\/p>\n<p>&emsp;&emsp;\u51c6\u786e\u7387\u516c\u5f0f\u4e3a\uff1a<br \/>\n$$<br \/>\n\\text{Precision}=\\frac{\\sum<em>{u\\in{U}}|R(u)\\bigcap{T(u)}|}{\\sum<\/em>{u\\in{U}}|R(u)|}<br \/>\n$$<br \/>\n\u5176\u4e2d$R(u)$\u662f\u7528\u6237\u5728\u8bad\u7ec3\u96c6\u4e0a\u7684\u884c\u4e3a\u7ed9\u7528\u6237\u4f5c\u51fa\u7684\u63a8\u8350\u5217\u8868\u3002<\/p>\n<p>&emsp;&emsp;\u53ec\u56de\u7387\u516c\u5f0f\u4e3a\uff1a<br \/>\n$$<br \/>\n\\text{Recall}=\\frac{\\sum<em>{u\\in{U}}|R(u)\\bigcap{T(u)}|}{\\sum<\/em>{u\\in{U}}|T(u)|}<br \/>\n$$<br \/>\n\u5176\u4e2d$T(u)$\u662f\u7528\u6237\u5728\u6d4b\u8bd5\u96c6\u4e0a\u7684\u884c\u4e3a\u7ed9\u7528\u6237\u4f5c\u51fa\u7684\u63a8\u8350\u5217\u8868\u3002<\/p>\n<p>&emsp;&emsp;\u51c6\u786e\u7387\u63cf\u8ff0\u6700\u7ec8\u7684\u63a8\u8350\u5217\u8868\u4e2d\u6709\u591a\u5c11\u6bd4\u4f8b\u662f\u53d1\u751f\u8fc7\u7684\u7528\u6237-\u7269\u54c1\u8bc4\u5206\u8bb0\u5f55\uff1b\u53ec\u56de\u7387\u63cf\u8ff0\u6709\u591a\u5c11\u6bd4\u4f8b\u7684\u7528\u6237-\u7269\u54c1\u8bc4\u5206\u8bb0\u5f55\u5305\u542b\u5728\u6700\u7ec8\u7684\u63a8\u8350\u5217\u8868\u4e2d\u3002<\/p>\n<pre><code class=\"language-python\">def recall_(train, test, N):\n    &quot;&quot;&quot;\u8ba1\u7b97\u53ec\u56de\u7387&quot;&quot;&quot;\n    hit = 0\n    all_ = 0\n\n    for user in train.keys():\n        tu = test[user]\n        rank = get_recommendation(user, N) \n        for item, pui in rank:\n            if item in tu:\n                hit += 1\n        all_ += len(tu)\n\n    return hit\/(all_*1.)\n\ndef precision(train, test, N):\n    &quot;&quot;&quot;\u8ba1\u7b97\u51c6\u786e\u7387&quot;&quot;&quot;\n    hit = 0\n    all_ = 0 \n    for user in train.keys():\n        tu = test[user]     \n        rank = get_recommendation(user, N)     \n        for item, pui in rank:\n            if item in tu:\n                hit += 1\n        all_ += N\n\n    return hit\/(all_*1.)<\/code><\/pre>\n<h2>\u8986\u76d6\u7387<\/h2>\n<p>&emsp;&emsp;\u8986\u76d6\u7387\u53cd\u6620\u4e86\u63a8\u8350\u7b97\u6cd5\u53d1\u6398\u957f\u5c3e\u7684\u80fd\u529b\uff0c\u8986\u76d6\u7387\u8d8a\u9ad8\uff0c\u8bf4\u660e\u63a8\u8350\u7b97\u6cd5\u8d8a\u80fd\u591f\u5c06\u957f\u5c3e\u4e2d\u7684\u7269\u54c1\u63a8\u8350\u7ed9\u7528\u6237\u3002<\/p>\n<p>$$<br \/>\n\\text{Coverate}=\\frac{|\\bigcup_{u\\in{U}}R(u)|}{|I|}<br \/>\n$$<\/p>\n<pre><code class=\"language-python\">def coverage(train, test, N):\n    &quot;&quot;&quot;\u8ba1\u7b97\u8986\u76d6\u7387&quot;&quot;&quot;\n    recommend_items = set()\n    all_items = set()\n\n    for user in train.keys():\n        for item in train[user].keys():\n            all_items.add(item)\n\n        rank = get_recommendation(user, N)\n        for item, pui in rank:\n            recommend_items.add(item)\n\n    return len(recommend_items)\/len((all_items)*1.)<\/code><\/pre>\n<h2>\u65b0\u9896\u5ea6<\/h2>\n<p>&emsp;&emsp;\u5982\u679c\u63a8\u8350\u51fa\u7684\u7269\u54c1\u90fd\u5f88\u70ed\u95e8\uff0c\u8bf4\u660e\u63a8\u8350\u7684\u65b0\u9896\u5ea6\u8f83\u4f4e\uff1b\u5426\u5219\u8bf4\u660e\u63a8\u8350\u7ed3\u679c\u6bd4\u8f83\u65b0\u9896\u3002<\/p>\n<pre><code class=\"language-python\">def popularity(train, test, N):\n    &quot;&quot;&quot;\u8ba1\u7b97\u65b0\u9896\u5ea6&quot;&quot;&quot;\n    item_popularity = dict()\n    for user, items in train.items():\n        for item in items.keys():\n            if item not in item_popularity:\n                item_popularity[item] = 0\n            item_popularity[item] += 1\n\n    ret = 0\n    n = 0\n    for user in train.keys():\n        rank = get_recommendation(user, N)\n        for item, pui in rank:\n            # \u7269\u54c1\u7684\u6d41\u884c\u5ea6\u5206\u5e03\u6ee1\u8db3\u957f\u5c3e\u5206\u5e03\uff0c\u5bf9\u6d41\u884c\u5ea6\u53d6\u5bf9\u6570\u540e\uff0c\u6d41\u884c\u5ea6\u7684\u5e73\u5747\u503c\u66f4\u7a33\u5b9a\n            ret += math.log(1+item_popularity[item])\n            n += 1\n\n    ret \/= n*1.\n\n    return ret<\/code><\/pre>\n<h1>\u57fa\u4e8e\u9886\u57df\u7684\u7b97\u6cd5<\/h1>\n<p>&emsp;&emsp;\u57fa\u4e8e\u9886\u57df\u7684\u7b97\u6cd5\u5206\u4e3a\u4e24\u5927\u7c7b\uff0c\u4e00\u7c7b\u662f\u57fa\u4e8e\u7528\u6237\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\uff0c\u53e6\u4e00\u7c7b\u662f\u57fa\u4e8e\u7269\u54c1\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\u3002<\/p>\n<h2>\u57fa\u4e8e\u7528\u6237\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5<\/h2>\n<p>&emsp;&emsp;\u5728\u4e00\u4e2a\u57fa\u4e8e\u7528\u6237\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\u7684\u5728\u7ebf\u4e2a\u6027\u5316\u63a8\u8350\u7cfb\u7edf\u4e2d\uff0c\u5f53\u4e00\u4e2a\u7528\u6237A\u9700\u8981\u4e2a\u6027\u5316\u63a8\u8350\u65f6\uff0c\u4e00\u822c\u9700\u8981\u4ee5\u4e0b\u4e24\u4e2a\u6b65\u9aa4\uff1a<\/p>\n<ol>\n<li>\u5148\u627e\u5230\u548c\u4ed6\u6709\u76f8\u4f3c\u5174\u8da3\u7684\u5176\u4ed6\u7528\u6237\uff08\u627e\u5230\u548c\u76ee\u6807\u7528\u6237\u5174\u8da3\u76f8\u4f3c\u7684\u7528\u6237\u96c6\u5408\uff09<\/li>\n<li>\u628a\u5176\u4ed6\u7528\u6237\u559c\u6b22\u7684\u4e14\u7528\u6237A\u6ca1\u6709\u542c\u8bf4\u8fc7\u7684\u7269\u54c1\u63a8\u8350\u7ed9\u7528\u6237A\uff08\u627e\u5230\u8fd9\u4e2a\u96c6\u5408\u4e2d\u7684\u7528\u6237\u559c\u6b22\u7684\u4e14\u76ee\u6807\u7528\u6237\u6ca1\u6709\u542c\u8bf4\u8fc7\u7684\u7269\u54c1\u63a8\u8350\u7ed9\u76ee\u6807\u7528\u6237\uff09<\/li>\n<\/ol>\n<p>&emsp;&emsp;\u5bf9\u4e8e\u6b65\u9aa41\uff0c\u6211\u4eec\u9700\u8981\u8ba1\u7b97\u4e24\u4e2a\u7528\u6237\u7684\u5174\u8da3\u76f8\u4f3c\u7a0b\u5ea6\uff0c\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\u4e3b\u8981\u5229\u7528\u7528\u6237\u7684\u884c\u4e3a\u7684\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u7528\u6237\u95f4\u5174\u8da3\u7684\u76f8\u4f3c\u5ea6\uff0c\u53ef\u4ee5\u4f7f\u7528\u4ee5\u4e0b\u4e24\u79cd\u65b9\u6cd5\u8ba1\u7b97\u7528\u6237\u95f4\u7684\u5174\u8da3\u76f8\u4f3c\u5ea6\uff08\u5176\u4e2d$u$\u548c$v$\u8868\u793a\u4e0d\u540c\u7684\u7528\u6237\uff0c$N(u)$\u8868\u793a\u7528\u6237$u$\u66fe\u7ecf\u62e5\u6709\u6b63\u53cd\u9988\u7684\u7269\u54c1\u96c6\u5408\uff1b$N(v)$\u8868\u793a\u7528\u6237$v$\u66fe\u7ecf\u62e5\u6709\u6b63\u53cd\u9988\u7684\u7269\u54c1\u96c6\u5408\u3002\uff09\uff1a<\/p>\n<ol>\n<li>\n<p>Jaccard\u516c\u5f0f\uff1a<br \/>\n$$<br \/>\nw_{uv}=\\frac{|N(u)\\bigcap{N(v)}|}{|N(u)\\bigcup{N(v)}|}<br \/>\n$$<\/p>\n<\/li>\n<li>\n<p>\u4f59\u5f26\u76f8\u4f3c\u5ea6\uff1a<br \/>\n$$<br \/>\nw_{uv}=\\frac{|N(u)\\bigcap{N(v)}|}{\\sqrt{|N(u)||{N(v)}|}}<br \/>\n$$<\/p>\n<\/li>\n<\/ol>\n<h3>UserCF\u63a8\u8350\u7b97\u6cd5<\/h3>\n<p><a href=\"https:\/\/egonlin.com\/wp-content\/uploads\/2022\/02\/\u7528\u6237\u884c\u4e3a\u8bb0\u5f55\u4e3e\u4f8b.bmp\"><div class='fancybox-wrapper lazyload-container-unload' data-fancybox='post-images' href='https:\/\/egonlin.com\/wp-content\/uploads\/2022\/02\/\u7528\u6237\u884c\u4e3a\u8bb0\u5f55\u4e3e\u4f8b.bmp'><img class=\"lazyload lazyload-style-2\" src=\"data:image\/svg+xml;base64,PCEtLUFyZ29uTG9hZGluZy0tPgo8c3ZnIHdpZHRoPSIxIiBoZWlnaHQ9IjEiIHhtbG5zPSJodHRwOi8vd3d3LnczLm9yZy8yMDAwL3N2ZyIgc3Ryb2tlPSIjZmZmZmZmMDAiPjxnPjwvZz4KPC9zdmc+\"  data-original=\"https:\/\/egonlin.com\/wp-content\/uploads\/2022\/02\/\u7528\u6237\u884c\u4e3a\u8bb0\u5f55\u4e3e\u4f8b.bmp\" src=\"data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsQAAA7EAZUrDhsAAAANSURBVBhXYzh8+PB\/AAffA0nNPuCLAAAAAElFTkSuQmCC\" alt=\"\" \/><\/div><\/a><\/p>\n<p>&emsp;&emsp;\u5047\u8bbe\u4e0a\u56fe\u4e3a\u67d0\u4e2a\u7f51\u7ad9\u7528\u6237\u884c\u4e3a\u8bb0\u5f55\uff0cUserCF\uff08\u57fa\u4e8e\u7528\u6237\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\uff09\u65f6\u4f7f\u7528\u4f59\u5f26\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u7528\u6237A\u548c\u7528\u6237B\u7684\u5174\u8da3\u76f8\u4f3c\u5ea6\u4e3a\uff1a<br \/>\n$$<br \/>\nw_{AB} = \\frac{|{a,b,d}\\bigcap{{a,c}|}}{\\sqrt{|{a,b,d}||{a,c}|}} = \\frac{1}{\\sqrt{6}}<br \/>\n$$<\/p>\n<p>&emsp;&emsp;\u540c\u7406\u6211\u4eec\u53ef\u4ee5\u8ba1\u7b97\u51fa\u7528\u6237A\u548c\u7528\u6237C\u3001D\u7684\u76f8\u4f3c\u5ea6\u4e3a\uff1a<br \/>\n$$<br \/>\n\\begin{aligned}<br \/>\n&amp; w<em>{AC} = \\frac{|{a,b,d}\\bigcap{{b,e}|}}{\\sqrt{|{a,b,d}||{b,e}|}} = \\frac{1}{\\sqrt{6}} \\<br \/>\n&amp; w<\/em>{AD} = \\frac{|{a,b,d}\\bigcap{{c,d,e}|}}{\\sqrt{|{a,b,d}||{c,d,e}|}} = \\frac{1}{\\sqrt{9}} = \\frac{1}{3}<br \/>\n\\end{aligned}<br \/>\n$$<\/p>\n<pre><code class=\"language-python\">def user_similarity(train):\n    &quot;&quot;&quot;\u8ba1\u7b97\u7528\u6237\u95f4\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6&quot;&quot;&quot;   \n    W = dict()\n    for u in train.keys():\n        for v in train.keys():\n            if u == v:\n                continue\n            W[u][v] = len(train[u] &amp; train[v])\n            W[u][v] \/= math.sqrt(len(train[u])*len(train[v])*1.)\n\n    return W<\/code><\/pre>\n<p>&emsp;&emsp;\u5bf9\u4e8e\u4e0a\u9762\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u8ba1\u7b97\uff0c\u6709\u7740\u5f88\u5927\u7684\u8ba1\u7b97\u5f00\u9500\u3002\u56e0\u4e3a\u5f88\u591a\u7528\u6237\u4e4b\u95f4\u5e76\u6ca1\u6709\u5bf9\u540c\u6837\u7684\u7269\u54c1\u4ea7\u751f\u8fc7\u884c\u4e3a\uff0c\u5373\u6709\u65f6\u5019$|N(u)\\bigcap{N(v)}|=0$.\u56e0\u6b64\u6211\u4eec\u53ef\u4ee5\u8003\u8651\u9996\u5148\u8ba1\u7b97$|N(u)\\bigcap{N(v)}|\\neq0$\u7684\u7528\u6237\u5bf9$(u,v)$\uff0c\u7136\u540e\u518d\u5bf9\u8fd9\u4e9b\u7528\u6237\u8fdb\u884c\u4f59\u5f26\u76f8\u4f3c\u5ea6\u8ba1\u7b97\uff0c\u5373\u4e0b\u9762\u6539\u8fdb\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u8ba1\u7b97\u3002<\/p>\n<p>&emsp;&emsp;\u6539\u8fdb\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u8ba1\u7b97\uff0c\u9700\u8981\u4ee5\u4e0b\u4e09\u4e2a\u6b65\u9aa4\uff1a<\/p>\n<ol>\n<li>\u6784\u5efa\u7269\u54c1\u5230\u7528\u6237\u7684\u5012\u6392\u8868\uff0c\u5373\u952e\u503c\u5bf9\u4e3a{\u7269\u54c1\uff1a\u5bf9\u8be5\u7269\u54c1\u4ea7\u751f\u884c\u4e3a\u7684\u7528\u6237\u5217\u8868}\u3002<\/li>\n<li>\u4ee4\u7a00\u758f\u77e9\u9635$C[u][v]=|N(u)\\bigcap{N(v)}|$\u3002\u5047\u8bbe\u7528\u6237$u$\u548c\u7528\u6237$v$\u540c\u65f6\u5c5e\u4e8e\u5012\u6392\u8868\u4e2d$K$\u4e2a\u7269\u54c1\u5bf9\u5e94\u7684\u7528\u6237\u5217\u8868\uff0c\u5219\u4f1a\u6709$C[u][v]=K$\u3002\u56e0\u6b64\u53ef\u4ee5\u626b\u63cf\u5012\u6392\u8868\u4e2d\u6bcf\u4e2a\u7269\u54c1\u5bf9\u5e94\u4f60\u7684\u7528\u6237\u5217\u8868\uff0c\u5c06\u7528\u6237\u8868\u4e2d\u7684\u4e24\u4e24\u7528\u6237\u5bf9\u5e94\u7684$C[u][v]$\u52a01.<\/li>\n<li>\u8ba1\u7b97$|N(u)\\bigcap{N(v)}|\\neq0$\u7528\u6237$u$\u548c\u7528\u6237$v$\u4e4b\u95f4\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u3002<\/li>\n<\/ol>\n<pre><code class=\"language-python\">def user_similarity(train):\n    &quot;&quot;&quot;\u8ba1\u7b97\u7528\u6237\u4e4b\u95f4\u7684\u76f8\u4f3c\u5ea6&quot;&quot;&quot;\n    # \u6784\u5efa\u7269\u54c1-\u7528\u6237\u7684\u5012\u6392\u8868\n    item_users = dict()\n    for u, items in train.items():\n        for i in items.keys():\n            if i not in item_users:\n                item_users[i] = set()\n            item_users[i].add(u)\n\n    # \u8ba1\u7b97\u7528\u6237\u4e4b\u95f4\u7684\u5171\u540c\u6709\u8fc7\u884c\u4e3a\u7684\u7269\u54c1\n    C = dict()\n    N = dict()\n    for i, users in item_users.items():\n        for u in users:\n            N[u] += 1\n            for v in users:\n                if u == v:\n                    continue\n                C[u][v] = +=1\n\n    # \u8ba1\u7b97\u6700\u540e\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u77e9\u9635W\n    W = dict()\n    for u, related_users in C.items():\n        for v, cuv in related_users.items():\n            W[u][v] = cuv\/math.sqrt(N[u]*N[v])\n\n    return W<\/code><\/pre>\n<pre><code>  File \"<ipython-input-8-8d19b8833b0f>\", line 19\n    C[u][v] = +=1\n               ^\nSyntaxError: invalid syntax<\/code><\/pre>\n<p>&emsp;&emsp;\u901a\u8fc7\u4e0a\u8ff0\u5f97\u5230\u7528\u6237\u4e4b\u95f4\u7684\u5174\u8da3\u76f8\u4f3c\u5ea6\u4e4b\u540e\uff0cUserCF\u7b97\u6cd5\u4f1a\u7ed9\u7528\u6237\u63a8\u8350\u548c\u4ed6\u5174\u8da3\u6700\u76f8\u4f3c\u7684K\u4e2a\u7528\u6237\u559c\u6b22\u7684\u7269\u54c1\u3002\u5e76\u4e14\u53ef\u4ee5\u901a\u8fc7\u4e0b\u8ff0\u516c\u5f0f\u8ba1\u7b97\u7528\u6237$u$\u5bf9\u7269\u54c1$i$\u7684\u611f\u5174\u8da3\u7a0b\u5ea6\uff1a<br \/>\n$$<br \/>\np(u,i) = \\sum<em>{v\\in{S}(u,K)\\bigcap{N(i)}}w<\/em>{uv}r<em>{vi}<br \/>\n$$<br \/>\n\u5176\u4e2d$S(u,K)$\u5305\u542b\u548c\u7528\u6237$u$\u5174\u8da3\u6700\u63a5\u8fd1\u7684$K$\u4e2a\u7528\u6237\uff0c$N(i)$\u662f\u5bf9\u7269\u54c1$i$\u6709\u8fc7\u884c\u4e3a\u7684\u7528\u6237\u96c6\u5408\uff0c$w<\/em>{uv}$\u662f\u7528\u6237$u$\u548c\u7528\u6237$v$\u7684\u5174\u8da3\u76f8\u4f3c\u5ea6\uff0c$r<em>{vi}$\u4ee3\u8868\u7528\u6237$v$\u5bf9\u7269\u54c1$i$\u7684\u5174\u8da3\uff0c\u56e0\u4e3a\u4f7f\u7528\u7684\u662f\u5355\u4e00\u884c\u4e3a\u7684\u9690\u53cd\u9988\u6570\u636e\uff0c\u56e0\u6b64\u6240\u6709\u7684$r<\/em>{vi}=1$\u3002<\/p>\n<pre><code class=\"language-python\">def recommend(user, train, W):\n    &quot;&quot;&quot;\u5b9e\u73b0UserCF\u7b97\u6cd5&quot;&quot;&quot;\n    rank = dict()\n    interacted_items = train[user]\n    for v, wuv in sorted(W[u].items, key=itemgetter(1), reverse=True)[0:K]:\n        for i, rvi in train[v].items():\n            if i in interacted_items:\n                # \u5728\u7ee7\u7eed\u4e4b\u524d\u6211\u4eec\u5e94\u8be5\u7b5b\u9009\u7528\u6237\u95f4\u7684\u884c\u4e3a\n            rank[i] += wuv*rvi\n\n    return rank<\/code><\/pre>\n<h3>User-IIF\u63a8\u8350\u7b97\u6cd5<\/h3>\n<p>&emsp;&emsp;\u5982\u679c\u4e24\u4e2a\u7528\u6237\u90fd\u4e70\u8fc7\u300a\u65b0\u534e\u5b57\u5178\u300b\uff0c\u5e76\u4e0d\u80fd\u8bc1\u660e\u4e24\u4e2a\u4eba\u5174\u8da3\u76f8\u4f3c\uff0c\u56e0\u4e3a\u4e2d\u56fd\u7edd\u5927\u591a\u6570\u4eba\u90fd\u4e70\u8fc7\u300a\u65b0\u534e\u5b57\u5178\u300b\uff0c\u4f46\u662f\u4e24\u4e2a\u7528\u6237\u90fd\u4e70\u8fc7\u51b7\u95e8\u5546\u54c1\uff0c\u5982\u300a\u673a\u5668\u5b66\u4e60\u300b\uff0c\u5219\u53ef\u4ee5\u8ba4\u4e3a\u4e24\u4e2a\u7528\u6237\u7684\u5174\u8da3\u76f8\u4f3c\u3002\u56e0\u6b64\u6211\u4eec\u53ef\u4ee5\u901a\u8fc7\u5982\u4e0b\u516c\u5f0f\u8ba1\u7b97\u7528\u6237\u95f4\u7684\u5174\u8da3\u76f8\u4f3c\u5ea6\uff1a<br \/>\n$$<br \/>\nw<em>{uv}=\\frac{\\sum<\/em>{i\\in{N(u)}\\bigcap{N(v)}}\\frac{1}{\\log{(1+|N(i)|)}}}{\\sqrt{|N(u)||N(v)|}}<br \/>\n$$<br \/>\n\u5176\u4e2d$\\frac{1}{\\log{(1+|N(i)|)}}$\u60e9\u7f5a\u4e86\u7528\u6237$u$\u548c\u7528\u6237$v$\u5171\u540c\u5174\u8da3\u5217\u8868\u4e2d\u70ed\u95e8\u7269\u54c1\u5bf9\u4ed6\u4eec\u76f8\u4f3c\u5ea6\u7684\u5f71\u54cd\u3002<\/p>\n<p>&emsp;&emsp;\u57fa\u4e8e\u4e0a\u8ff0\u7528\u6237\u95f4\u5174\u8da3\u76f8\u4f3c\u5ea6\u5219\u53ef\u4ee5\u6539\u9020UserCF\u7b97\u6cd5\u4e3aUser-IIF\u7b97\u6cd5\u3002<\/p>\n<pre><code class=\"language-python\">def user_similarity(train):\n    &quot;&quot;&quot;\u8ba1\u7b97\u7528\u6237\u4e4b\u95f4\u7684\u76f8\u4f3c\u5ea6&quot;&quot;&quot;\n    # \u6784\u5efa\u7269\u54c1-\u7528\u6237\u7684\u5012\u6392\u8868\n    item_users = dict()\n    for u, items in train.items():\n        for i in items.keys():\n            if i not in item_users:\n                item_users[i] = set()\n            item_users[i].add(u)\n\n    # \u8ba1\u7b97\u7528\u6237\u4e4b\u95f4\u7684\u5171\u540c\u6709\u8fc7\u884c\u4e3a\u7684\u7269\u54c1\n    C = dict()\n    N = dict()\n    for i, users in item_users.items():\n        for u in users:\n            N[u] += 1\n            for v in users:\n                if u == v:\n                    continue\n                C[u][v] = +=1\/math.log(1+len(users))\n\n    # \u8ba1\u7b97\u6700\u540e\u7684\u4f59\u5f26\u76f8\u4f3c\u5ea6\u77e9\u9635W\n    W = dict()\n    for u, related_users in C.items():\n        for v, cuv in related_users.items():\n            W[u][v] = cuv\/math.sqrt(N[u]*N[v])\n\n    return W<\/code><\/pre>\n<h2>\u57fa\u4e8e\u7269\u54c1\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5<\/h2>\n<p>&emsp;&emsp;\u57fa\u4e8e\u7269\u54c1\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\u7684\u63a8\u8350\u7cfb\u7edf\u4e3b\u8981\u5206\u4e3a\u4e24\u6b65\uff1a<\/p>\n<ol>\n<li>\u8ba1\u7b97\u7269\u54c1\u4e4b\u95f4\u7684\u76f8\u4f3c\u5ea6<\/li>\n<li>\u6839\u636e\u7269\u54c1\u7684\u76f8\u4f3c\u5ea6\u548c\u7528\u6237\u7684\u5386\u53f2\u884c\u4e3a\u7ed9\u7528\u6237\u751f\u6210\u63a8\u8350\u5217\u8868<\/li>\n<\/ol>\n<p>&emsp;&emsp;\u7531\u4e8e\u57fa\u4e8e\u7269\u54c1\u7684\u534f\u540c\u8fc7\u6ee4\u7b97\u6cd5\u7684\u601d\u60f3\u662f\u2014\u2014\u8d2d\u4e70\u4e86\u8be5\u5546\u54c1\u7684\u7528\u6237\u4e5f\u4f1a\u7ecf\u5e38\u8d2d\u4e70\u5176\u4ed6\u7684\u5546\u54c1\u3002\u6839\u636e\u8fd9\u53e5\u8bdd\uff0c\u53ef\u4ee5\u7528\u4e0b\u9762\u7684\u516c\u5f0f\u5b9a\u4e49\u7269\u54c1\u7684\u76f8\u4f3c\u5ea6\uff1a<br \/>\n$$<br \/>\nw_{ij} = \\frac{|N(i)\\bigcap{N(j)|}}{|N(j)|}<br \/>\n$$<br \/>\n\u5176\u4e2d$N(i)$\u662f\u559c\u6b22\u7269\u54c1$i$\u7684\u7528\u6237\u6570\uff0c$|N(i)\\bigcap{N(j)|}$\u662f\u540c\u65f6\u559c\u6b22\u7269\u54c1$i$\u548c\u7269\u54c1$j$\u7684\u7528\u6237\u6570\u3002<\/p>\n<p>&emsp;&emsp;\u5bf9\u4e8e\u4e0a\u8ff0\u7b80\u5355\u7684\u516c\u5f0f\uff0c\u5982\u679c\u7269\u54c1$j$\u5f88\u70ed\u95e8\uff0c\u5219\u8be5\u516c\u5f0f\u4f1a\u9020\u6210\u4efb\u4f55\u7269\u54c1\u90fd\u4f1a\u548c\u70ed\u95e8\u7684\u7269\u54c1\u6709\u5f88\u5927\u7684\u76f8\u4f3c\u5ea6\uff0c\u8fd9\u5bf9\u4e8e\u81f4\u529b\u4e8e\u6316\u6398\u957f\u5c3e\u4fe1\u606f\u7684\u63a8\u8350\u7cfb\u7edf\u6765\u8bf4\u4e0d\u662f\u4e00\u4e2a\u597d\u7684\u7279\u6027\uff0c\u56e0\u6b64\u53ef\u4ee5\u628a\u4e0a\u8ff0\u516c\u5f0f\u6539\u9020\u6210\uff1a<br \/>\n$$<br \/>\nw_{ij} = \\frac{|N(i)\\bigcap{N(j)}|}{\\sqrt{|N(i)||N(j)|}}<br \/>\n$$<br \/>\n\u5176\u4e2d$\\sqrt{|N(i)||N(j)|}$\u76f8\u5f53\u4e8e\u60e9\u7f5a\u4e86\u7269\u54c1$j$\u7684\u6743\u91cd\uff0c\u51cf\u8f7b\u4e86\u70ed\u95e8\u7269\u54c1\u4f1a\u548c\u5f88\u591a\u7269\u54c1\u76f8\u4f3c\u7684\u53ef\u80fd\u6027\u3002<\/p>\n<h2>ItemCF\u7b97\u6cd5<\/h2>\n<p>&emsp;&emsp;ItemCF\u7b97\u6cd5\u7684\u601d\u60f3\u662f\uff0c\u5047\u8bbe\u6bcf\u4e2a\u7528\u6237\u7684\u5174\u8da3\u90fd\u5c40\u9650\u5728\u67d0\u51e0\u4e2a\u65b9\u9762\u3002\u5982\u679c\u4e24\u4e2a\u7269\u54c1\u5c5e\u4e8e\u4e00\u4e2a\u7528\u6237\u7684\u5174\u8da3\u5217\u8868\uff0c\u90a3\u4e48\u8fd9\u4e24\u4e2a\u7269\u54c1\u53ef\u80fd\u5c31\u5c5e\u4e8e\u6709\u9650\u7684\u51e0\u4e2a\u9886\u57df\uff1b\u5982\u679c\u4e24\u4e2a\u7269\u54c1\u5c5e\u4e8e\u5f88\u591a\u7528\u6237\u7684\u5174\u8da3\u5217\u8868\uff0c\u5219\u5b83\u4eec\u53ef\u80fd\u5c31\u5c5e\u4e8e\u540c\u4e00\u4e2a\u9886\u57df\uff0c\u56e0\u800c\u6709\u5f88\u5927\u7684\u76f8\u4f3c\u5ea6\u3002<\/p>\n<p>&emsp;&emsp;ItemCF\u7b97\u6cd5\u7c7b\u4f3c\u4e8eUserCF\u7b97\u6cd5\uff0c\u4e5f\u9700\u8981\u5efa\u7acb\u7528\u6237-\u7269\u54c1\u5012\u6392\u8868\uff0c\u5efa\u7acb\u8be5\u8868\u6d41\u7a0b\u5982\u4e0b\uff1a<\/p>\n<ol>\n<li>\u5148\u5bf9\u6bcf\u4e2a\u7528\u6237\u5efa\u7acb\u4e00\u4e2a\u5305\u542b\u4ed6\u559c\u6b22\u7269\u54c1\u7684\u5217\u8868<\/li>\n<li>\u5bf9\u4e8e\u6bcf\u4e2a\u7528\u6237\uff0c\u5c06\u4ed6\u7269\u54c1\u5217\u8868\u4e2d\u7684\u7269\u54c1\u4e24\u4e24\u5728\u5171\u73b0\u77e9\u9635C\u4e2d\u52a01<\/li>\n<\/ol>\n<pre><code class=\"language-python\">import math\n\ndef item_similarity(train):\n    &quot;&quot;&quot;\u8ba1\u7b97\u7269\u54c1\u95f4\u7684\u76f8\u4f3c\u5ea6&quot;&quot;&quot;\n    # \u8ba1\u7b97\u7528\u6237\u5171\u540c\u62e5\u6709\u7684\u7269\u54c1\n    C = dict()\n    N = dict()\n    for u, items in train.items():\n        for i in items:\n            try:\n                N[i] += 1\n            except:\n                N[i] = 1\n            for j in items:\n                if i == j:\n                    continue\n                try:\n                    try:\n                        if not isinstance(C[i], dict):\n                            C[i] = dict()\n                    except:\n                        C[i] = dict()\n                    C[i][j] += 1\n                except:\n                    C[i][j] = 1\n\n    # \u8ba1\u7b97\u6700\u540e\u7684\u76f8\u4f3c\u77e9\u9635W\n    W = dict()\n    for i, related_items in C.items():\n        for j, cij in related_items.items():\n            W[i] = dict()\n            W[i][j] = cij \/ math.sqrt(N[i]*N[j])\n\n    return W\n\ntrain = {&#039;A&#039;: [&#039;a&#039;, &#039;b&#039;, &#039;d&#039;], &#039;B&#039;: [&#039;b&#039;, &#039;c&#039;, &#039;e&#039;], &#039;C&#039;: [\n    &#039;c&#039;, &#039;d&#039;], &#039;d&#039;: [&#039;b&#039;, &#039;c&#039;, &#039;d&#039;], &#039;e&#039;: [&#039;a&#039;, &#039;d&#039;]}\n\nW = item_similarity(train)<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>\u57fa\u4e8e\u534f\u540c\u8fc7\u6ee4\u7684\u63a8\u8350\u7b97\u6cd5 &emsp;&emsp;\u672c\u63a8\u8350\u7cfb\u7edf\u91c7\u7528\u4e2d\u7b49\u5927\u5c0f\u7684MovieLens\u6570\u636e\u96c6\uff0c\u8be5\u6570\u636e\u96c6\u5305\u542b [&hellip;]<\/p>\n","protected":false},"author":3,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[276,300],"tags":[],"_links":{"self":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3244"}],"collection":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/users\/3"}],"replies":[{"embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=3244"}],"version-history":[{"count":0,"href":"https:\/\/egonlin.com\/index.php?rest_route=\/wp\/v2\/posts\/3244\/revisions"}],"wp:attachment":[{"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=3244"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=3244"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/egonlin.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=3244"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}