|
4 | 4 | "cell_type": "markdown", |
5 | 5 | "metadata": {}, |
6 | 6 | "source": [ |
7 | | - "<a href=\"https://colab.research.google.com/github/minsuk-heo/python_tutorial/data_science/nlp/blob/master/jupyter_notebooks/word2vec_gensim.ipynb\" target=\"_parent\"><img src=\"https://camo.githubusercontent.com/52feade06f2fecbf006889a904d221e6a730c194/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667\" alt=\"Open In Colab\" data-canonical-src=\"https://colab.research.google.com/assets/colab-badge.svg\"></a>" |
| 7 | + "<a href=\"https://colab.research.google.com/github/minsuk-heo/python_tutorial/blob/master/data_science/nlp/word2vec_gensim.ipynb\" target=\"_parent\"><img src=\"https://camo.githubusercontent.com/52feade06f2fecbf006889a904d221e6a730c194/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667\" alt=\"Open In Colab\" data-canonical-src=\"https://colab.research.google.com/assets/colab-badge.svg\"></a>" |
8 | 8 | ] |
9 | 9 | }, |
10 | 10 | { |
|
16 | 16 | }, |
17 | 17 | { |
18 | 18 | "cell_type": "code", |
19 | | - "execution_count": 16, |
| 19 | + "execution_count": 1, |
20 | 20 | "metadata": {}, |
21 | 21 | "outputs": [ |
22 | 22 | { |
23 | 23 | "name": "stdout", |
24 | 24 | "output_type": "stream", |
25 | 25 | "text": [ |
26 | | - "--2020-01-20 22:14:56-- https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz\n", |
27 | | - "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.166.53\n", |
28 | | - "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.166.53|:443... connected.\n", |
| 26 | + "--2020-06-03 23:13:08-- https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz\n", |
| 27 | + "Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.16.238\n", |
| 28 | + "Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.16.238|:443... connected.\n", |
29 | 29 | "HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable\n", |
30 | 30 | "\n", |
31 | 31 | " The file is already fully retrieved; nothing to do.\n", |
|
39 | 39 | }, |
40 | 40 | { |
41 | 41 | "cell_type": "code", |
42 | | - "execution_count": 1, |
| 42 | + "execution_count": 2, |
43 | 43 | "metadata": {}, |
44 | 44 | "outputs": [], |
45 | 45 | "source": [ |
|
48 | 48 | }, |
49 | 49 | { |
50 | 50 | "cell_type": "code", |
51 | | - "execution_count": 2, |
| 51 | + "execution_count": 3, |
52 | 52 | "metadata": {}, |
53 | 53 | "outputs": [], |
54 | 54 | "source": [ |
|
58 | 58 | }, |
59 | 59 | { |
60 | 60 | "cell_type": "code", |
61 | | - "execution_count": 3, |
| 61 | + "execution_count": 4, |
62 | 62 | "metadata": {}, |
63 | 63 | "outputs": [ |
64 | 64 | { |
65 | 65 | "data": { |
66 | 66 | "text/plain": [ |
67 | | - "[('coffees', 0.721267819404602),\n", |
68 | | - " ('gourmet_coffee', 0.7057087421417236),\n", |
69 | | - " ('Coffee', 0.6900454759597778),\n", |
70 | | - " ('o_joe', 0.6891065835952759),\n", |
71 | | - " ('Starbucks_coffee', 0.6874972581863403)]" |
| 67 | + "[('pal', 0.7476358413696289),\n", |
| 68 | + " ('friends', 0.7098034620285034),\n", |
| 69 | + " ('buddy', 0.6972494125366211),\n", |
| 70 | + " ('dear_friend', 0.6960037350654602),\n", |
| 71 | + " ('acquaintance', 0.6843010187149048)]" |
72 | 72 | ] |
73 | 73 | }, |
74 | | - "execution_count": 3, |
| 74 | + "execution_count": 4, |
75 | 75 | "metadata": {}, |
76 | 76 | "output_type": "execute_result" |
77 | 77 | } |
|
83 | 83 | }, |
84 | 84 | { |
85 | 85 | "cell_type": "code", |
86 | | - "execution_count": 7, |
| 86 | + "execution_count": 5, |
87 | 87 | "metadata": {}, |
88 | 88 | "outputs": [ |
89 | 89 | { |
|
92 | 92 | "[('queen', 0.7118192911148071)]" |
93 | 93 | ] |
94 | 94 | }, |
95 | | - "execution_count": 7, |
| 95 | + "execution_count": 5, |
96 | 96 | "metadata": {}, |
97 | 97 | "output_type": "execute_result" |
98 | 98 | } |
|
104 | 104 | }, |
105 | 105 | { |
106 | 106 | "cell_type": "code", |
107 | | - "execution_count": 5, |
| 107 | + "execution_count": 6, |
108 | 108 | "metadata": {}, |
109 | 109 | "outputs": [ |
110 | 110 | { |
|
113 | 113 | "300" |
114 | 114 | ] |
115 | 115 | }, |
116 | | - "execution_count": 5, |
| 116 | + "execution_count": 6, |
117 | 117 | "metadata": {}, |
118 | 118 | "output_type": "execute_result" |
119 | 119 | } |
|
125 | 125 | }, |
126 | 126 | { |
127 | 127 | "cell_type": "code", |
128 | | - "execution_count": 6, |
| 128 | + "execution_count": 7, |
129 | 129 | "metadata": {}, |
130 | 130 | "outputs": [ |
131 | 131 | { |
132 | 132 | "data": { |
133 | 133 | "text/plain": [ |
134 | | - "array([-1.61132812e-01, -1.36718750e-01, -3.73046875e-01, 6.17187500e-01,\n", |
135 | | - " 1.08398438e-01, 2.72216797e-02, 1.00097656e-01, -1.51367188e-01,\n", |
136 | | - " -1.66015625e-02, 3.80859375e-01, 6.54296875e-02, -1.31835938e-01,\n", |
137 | | - " 2.53906250e-01, 9.08203125e-02, 2.86865234e-02, 2.53906250e-01,\n", |
138 | | - " -2.05078125e-01, 1.64062500e-01, 2.20703125e-01, -1.74804688e-01,\n", |
139 | | - " -2.01171875e-01, 1.30859375e-01, -3.22265625e-02, -2.41210938e-01,\n", |
140 | | - " -3.19824219e-02, 2.48046875e-01, -2.37304688e-01, 2.89062500e-01,\n", |
141 | | - " 1.64794922e-02, 1.29394531e-02, 1.72119141e-02, -3.53515625e-01,\n", |
142 | | - " -1.66992188e-01, -5.90820312e-02, -2.81250000e-01, 9.94873047e-03,\n", |
143 | | - " -1.94091797e-02, -3.22265625e-01, 1.73339844e-02, -5.83496094e-02,\n", |
144 | | - " -2.59765625e-01, 1.42669678e-03, 5.81054688e-02, 1.13769531e-01,\n", |
145 | | - " -8.64257812e-02, 3.54003906e-02, -4.29687500e-01, 2.86865234e-03,\n", |
146 | | - " 6.98852539e-03, 1.80664062e-01, -1.79687500e-01, 2.95410156e-02,\n", |
147 | | - " -1.56250000e-01, -2.08007812e-01, -9.08203125e-02, 4.15039062e-03,\n", |
148 | | - " 1.07421875e-01, 3.12500000e-01, -1.04980469e-01, -3.24218750e-01,\n", |
149 | | - " -1.24023438e-01, -7.05718994e-04, -1.05957031e-01, 2.12890625e-01,\n", |
150 | | - " 1.12304688e-01, -1.58203125e-01, -1.67968750e-01, -9.71679688e-02,\n", |
151 | | - " 1.53320312e-01, -1.11328125e-01, 3.22265625e-01, 2.28515625e-01,\n", |
152 | | - " 3.20312500e-01, -1.72119141e-02, -4.57031250e-01, 3.23486328e-03,\n", |
153 | | - " -1.76757812e-01, -5.00488281e-02, 3.05175781e-02, -2.75390625e-01,\n", |
154 | | - " -1.65039062e-01, -3.56445312e-02, 7.95898438e-02, 1.35742188e-01,\n", |
155 | | - " -8.64257812e-02, -7.32421875e-02, 1.36718750e-01, 2.33398438e-01,\n", |
156 | | - " 7.95898438e-02, 1.32446289e-02, -4.71191406e-02, 1.01074219e-01,\n", |
157 | | - " 2.37304688e-01, -1.81640625e-01, -2.14843750e-01, -1.65039062e-01,\n", |
158 | | - " -1.66015625e-02, -1.51367188e-01, 3.06640625e-01, -2.40234375e-01,\n", |
159 | | - " -2.29492188e-01, -1.29882812e-01, 8.97216797e-03, 1.97265625e-01,\n", |
160 | | - " 7.47070312e-02, -1.64031982e-03, 1.54296875e-01, -6.80541992e-03,\n", |
161 | | - " -1.12304688e-01, -7.61718750e-02, -8.74023438e-02, -1.31835938e-01,\n", |
162 | | - " -2.94921875e-01, -2.46093750e-01, 6.15234375e-02, -1.23046875e-01,\n", |
163 | | - " -8.34960938e-02, -8.39843750e-02, -1.61132812e-02, -4.30297852e-03,\n", |
164 | | - " -4.05273438e-02, -2.84423828e-02, 1.36718750e-01, 2.13623047e-02,\n", |
165 | | - " -2.81250000e-01, 2.40234375e-01, -3.75976562e-02, -9.66796875e-02,\n", |
166 | | - " 1.28906250e-01, 1.43554688e-01, -1.37695312e-01, -1.38549805e-02,\n", |
167 | | - " -4.12597656e-02, -4.51660156e-02, -3.75976562e-02, 1.89453125e-01,\n", |
168 | | - " 5.32226562e-02, 1.17675781e-01, -8.25195312e-02, -1.56250000e-01,\n", |
169 | | - " 1.47460938e-01, -2.63671875e-01, -2.79296875e-01, -4.31640625e-01,\n", |
170 | | - " -5.90820312e-02, 2.74658203e-03, 2.87109375e-01, -2.71606445e-03,\n", |
171 | | - " -2.46093750e-01, 2.74658203e-02, -9.08203125e-02, 6.54296875e-02,\n", |
172 | | - " -1.94335938e-01, -2.16064453e-02, 2.77343750e-01, 5.98144531e-02,\n", |
173 | | - " 2.33154297e-02, -1.37695312e-01, -5.39062500e-01, -1.64794922e-02,\n", |
174 | | - " -1.25976562e-01, -1.36718750e-01, 3.02734375e-02, 2.50000000e-01,\n", |
175 | | - " 5.53131104e-04, 1.36718750e-01, 2.96875000e-01, -5.10253906e-02,\n", |
176 | | - " 9.08203125e-02, -2.39257812e-01, 1.35742188e-01, 1.11328125e-01,\n", |
177 | | - " 1.96289062e-01, -1.54296875e-01, -3.37890625e-01, -3.36914062e-02,\n", |
178 | | - " -9.47265625e-02, -1.69921875e-01, -1.04003906e-01, 1.46484375e-01,\n", |
179 | | - " 4.54101562e-02, -4.12109375e-01, -2.47070312e-01, -6.10351562e-03,\n", |
180 | | - " 4.55078125e-01, -2.35595703e-02, 4.93164062e-02, 1.42578125e-01,\n", |
181 | | - " 2.66113281e-02, 4.11987305e-03, -7.27539062e-02, 2.53906250e-02,\n", |
182 | | - " -3.39355469e-02, 7.91015625e-02, 2.87109375e-01, 3.88671875e-01,\n", |
183 | | - " -1.58691406e-02, -8.44726562e-02, -1.15722656e-01, -1.22558594e-01,\n", |
184 | | - " -1.02050781e-01, 1.32812500e-01, 2.21679688e-01, -2.03125000e-01,\n", |
185 | | - " 7.91015625e-02, 1.69677734e-02, 2.16796875e-01, 2.33398438e-01,\n", |
186 | | - " -2.08984375e-01, -1.36718750e-01, -2.45117188e-01, 3.93066406e-02,\n", |
187 | | - " -1.80664062e-01, 1.37695312e-01, 1.50390625e-01, -3.90625000e-02,\n", |
188 | | - " -1.32812500e-01, 2.75878906e-02, -1.78710938e-01, 1.55273438e-01,\n", |
189 | | - " 1.36718750e-01, -1.14257812e-01, -2.79296875e-01, -7.86132812e-02,\n", |
190 | | - " 3.08593750e-01, -5.32226562e-02, -1.65039062e-01, 5.83496094e-02,\n", |
191 | | - " 2.19726562e-01, -1.25000000e-01, 6.10351562e-02, -3.39355469e-02,\n", |
192 | | - " -3.16406250e-01, 2.14843750e-01, -4.12597656e-02, -1.94335938e-01,\n", |
193 | | - " 7.76367188e-02, -5.21850586e-03, 6.93359375e-02, 2.18750000e-01,\n", |
194 | | - " 1.71875000e-01, -1.97265625e-01, 1.07910156e-01, 8.25195312e-02,\n", |
195 | | - " 3.39355469e-02, -1.15722656e-01, -2.02941895e-03, 4.83398438e-02,\n", |
196 | | - " 1.50390625e-01, -2.73437500e-01, -9.61914062e-02, 3.39843750e-01,\n", |
197 | | - " 2.98828125e-01, 1.32812500e-01, -3.68652344e-02, -3.08593750e-01,\n", |
198 | | - " 2.94189453e-02, -1.31835938e-01, -7.12890625e-02, -2.57873535e-03,\n", |
199 | | - " -1.17187500e-01, 6.34765625e-03, -1.66992188e-01, 2.01171875e-01,\n", |
200 | | - " -1.33789062e-01, -1.77734375e-01, -1.09863281e-01, 5.06591797e-03,\n", |
201 | | - " -1.07910156e-01, -1.30859375e-01, -5.17578125e-02, 2.57812500e-01,\n", |
202 | | - " 5.41992188e-02, -6.34765625e-03, 3.00598145e-03, 7.95898438e-02,\n", |
203 | | - " -2.37304688e-01, -8.05664062e-02, 6.07910156e-02, 9.27734375e-02,\n", |
204 | | - " 1.65039062e-01, -1.22558594e-01, 1.88476562e-01, 2.50000000e-01,\n", |
205 | | - " -1.42578125e-01, -7.91015625e-02, -1.78710938e-01, 1.52343750e-01,\n", |
206 | | - " -7.76367188e-02, 2.42187500e-01, 2.56347656e-02, -1.26953125e-01,\n", |
207 | | - " -1.25000000e-01, -3.19824219e-02, -1.27929688e-01, 1.49414062e-01,\n", |
208 | | - " -1.34277344e-02, 6.59179688e-02, 2.17773438e-01, 2.02148438e-01],\n", |
| 134 | + "array([ 0.07080078, -0.21386719, 0.15332031, 0.09423828, -0.03442383,\n", |
| 135 | + " 0.43359375, -0.16503906, -0.05786133, 0.17578125, -0.08203125,\n", |
| 136 | + " 0.24511719, -0.19335938, -0.0255127 , -0.09619141, -0.125 ,\n", |
| 137 | + " 0.02575684, 0.16796875, -0.03759766, 0.09472656, -0.04760742,\n", |
| 138 | + " 0.20605469, 0.31835938, 0.15917969, -0.17089844, 0.09033203,\n", |
| 139 | + " -0.1640625 , -0.15234375, 0.3125 , 0.06298828, -0.24902344,\n", |
| 140 | + " 0.15625 , -0.04516602, -0.12890625, -0.00686646, -0.02160645,\n", |
| 141 | + " 0.14453125, 0.2734375 , 0.12695312, 0.10742188, 0.11376953,\n", |
| 142 | + " 0.14355469, -0.00173187, 0.22851562, -0.03515625, 0.17089844,\n", |
| 143 | + " 0.04516602, -0.07958984, -0.08886719, -0.01342773, -0.09667969,\n", |
| 144 | + " -0.12597656, 0.10595703, 0.15332031, -0.03808594, 0.02246094,\n", |
| 145 | + " 0.01428223, -0.03295898, 0.20703125, -0.03417969, 0.02233887,\n", |
| 146 | + " 0.00244141, 0.13476562, -0.01403809, 0.13378906, 0.0201416 ,\n", |
| 147 | + " 0.14746094, 0.00759888, -0.18652344, 0.16113281, 0.109375 ,\n", |
| 148 | + " 0.14355469, 0.01623535, 0.01867676, 0.09179688, -0.33789062,\n", |
| 149 | + " 0.19335938, -0.29101562, -0.00860596, 0.10644531, 0.359375 ,\n", |
| 150 | + " 0.25585938, -0.03320312, 0.15625 , -0.24316406, -0.06738281,\n", |
| 151 | + " 0.09033203, -0.125 , 0.21777344, -0.02380371, -0.06445312,\n", |
| 152 | + " -0.14355469, 0.05664062, -0.12597656, 0.02172852, 0.03833008,\n", |
| 153 | + " -0.17578125, -0.08349609, 0.21386719, -0.01855469, -0.23535156,\n", |
| 154 | + " -0.14746094, -0.16113281, -0.03125 , -0.10107422, 0.07080078,\n", |
| 155 | + " 0.01135254, -0.04370117, 0.07666016, 0.16503906, 0.04541016,\n", |
| 156 | + " -0.13867188, 0.13085938, 0.13378906, -0.14453125, 0.12792969,\n", |
| 157 | + " -0.06787109, -0.04296875, -0.03369141, 0.10302734, 0.22949219,\n", |
| 158 | + " 0.14160156, -0.01153564, -0.00086212, -0.10449219, -0.03710938,\n", |
| 159 | + " 0.01928711, 0.16699219, -0.06079102, 0.09814453, 0.0703125 ,\n", |
| 160 | + " -0.39648438, -0.23242188, -0.04077148, 0.09570312, -0.0546875 ,\n", |
| 161 | + " -0.09814453, 0.09082031, 0.03588867, 0.09228516, 0.3125 ,\n", |
| 162 | + " 0.10595703, 0.18847656, -0.11230469, 0.00842285, 0.08935547,\n", |
| 163 | + " 0.04663086, -0.25 , -0.03369141, 0.03808594, -0.03710938,\n", |
| 164 | + " 0.42773438, 0.10839844, -0.01391602, -0.01965332, -0.04296875,\n", |
| 165 | + " -0.11035156, 0.0390625 , 0.04541016, -0.20019531, -0.14355469,\n", |
| 166 | + " -0.14257812, 0.03662109, 0.25 , 0.3671875 , -0.12304688,\n", |
| 167 | + " -0.0859375 , 0.24902344, -0.21582031, 0.02648926, 0.17871094,\n", |
| 168 | + " 0.29296875, 0.21582031, 0.1015625 , 0.00167084, -0.07177734,\n", |
| 169 | + " 0.03686523, 0.22851562, -0.125 , 0.17285156, 0.22265625,\n", |
| 170 | + " 0.21191406, 0.03686523, 0.09570312, -0.00344849, 0.13183594,\n", |
| 171 | + " -0.23925781, 0.00576782, 0.27148438, 0.10400391, 0.0098877 ,\n", |
| 172 | + " -0.24511719, 0.21777344, -0.03027344, 0.23046875, 0.11816406,\n", |
| 173 | + " 0.1640625 , -0.00109863, 0.00349426, -0.02197266, -0.09179688,\n", |
| 174 | + " -0.10351562, 0.06933594, -0.13476562, -0.06201172, 0.14355469,\n", |
| 175 | + " -0.10888672, -0.11328125, 0.2109375 , -0.10839844, -0.18261719,\n", |
| 176 | + " -0.06689453, -0.265625 , -0.13378906, -0.04296875, -0.17773438,\n", |
| 177 | + " 0.00689697, -0.00982666, -0.00640869, -0.12792969, 0.08203125,\n", |
| 178 | + " -0.01367188, 0.02734375, 0.12597656, -0.00772095, -0.04614258,\n", |
| 179 | + " -0.12255859, 0.16210938, 0.28320312, 0.04296875, -0.05175781,\n", |
| 180 | + " -0.16210938, 0.14648438, -0.18359375, -0.24511719, 0.22167969,\n", |
| 181 | + " 0.0546875 , -0.10302734, -0.07763672, -0.33984375, -0.05908203,\n", |
| 182 | + " -0.0022583 , -0.11962891, -0.3046875 , 0.02233887, 0.02941895,\n", |
| 183 | + " 0.37695312, -0.01721191, -0.05932617, 0.30273438, -0.13574219,\n", |
| 184 | + " 0.14746094, 0.17089844, 0.16015625, 0.21484375, 0.01013184,\n", |
| 185 | + " 0.06738281, -0.12109375, -0.12304688, -0.20117188, 0.02880859,\n", |
| 186 | + " -0.00662231, -0.20410156, 0.02001953, -0.15136719, 0.16699219,\n", |
| 187 | + " 0.14160156, -0.02331543, 0.14550781, -0.13476562, 0.04785156,\n", |
| 188 | + " 0.14160156, 0.03808594, -0.12109375, 0.02770996, -0.0123291 ,\n", |
| 189 | + " -0.20410156, -0.06445312, 0.06079102, -0.07519531, -0.28125 ,\n", |
| 190 | + " 0.18261719, -0.25390625, -0.0456543 , 0.14160156, -0.0546875 ,\n", |
| 191 | + " -0.01477051, -0.38085938, 0.14355469, 0.12255859, 0.14941406,\n", |
| 192 | + " -0.03320312, 0.19433594, -0.34375 , -0.24902344, -0.00331116,\n", |
| 193 | + " -0.05639648, -0.00079727, -0.21679688, -0.01977539, 0.10644531],\n", |
209 | 194 | " dtype=float32)" |
210 | 195 | ] |
211 | 196 | }, |
212 | | - "execution_count": 6, |
| 197 | + "execution_count": 7, |
213 | 198 | "metadata": {}, |
214 | 199 | "output_type": "execute_result" |
215 | 200 | } |
|
0 commit comments