Skip to content

Commit e4eb0c8

Browse files
committed
updated
1 parent 13be66d commit e4eb0c8

File tree

1 file changed

+80
-95
lines changed

1 file changed

+80
-95
lines changed

data_science/nlp/word2vec_gensim.ipynb

Lines changed: 80 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"cell_type": "markdown",
55
"metadata": {},
66
"source": [
7-
"<a href=\"https://colab.research.google.com/github/minsuk-heo/python_tutorial/data_science/nlp/blob/master/jupyter_notebooks/word2vec_gensim.ipynb\" target=\"_parent\"><img src=\"https://camo.githubusercontent.com/52feade06f2fecbf006889a904d221e6a730c194/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667\" alt=\"Open In Colab\" data-canonical-src=\"https://colab.research.google.com/assets/colab-badge.svg\"></a>"
7+
"<a href=\"https://colab.research.google.com/github/minsuk-heo/python_tutorial/blob/master/data_science/nlp/word2vec_gensim.ipynb\" target=\"_parent\"><img src=\"https://camo.githubusercontent.com/52feade06f2fecbf006889a904d221e6a730c194/68747470733a2f2f636f6c61622e72657365617263682e676f6f676c652e636f6d2f6173736574732f636f6c61622d62616467652e737667\" alt=\"Open In Colab\" data-canonical-src=\"https://colab.research.google.com/assets/colab-badge.svg\"></a>"
88
]
99
},
1010
{
@@ -16,16 +16,16 @@
1616
},
1717
{
1818
"cell_type": "code",
19-
"execution_count": 16,
19+
"execution_count": 1,
2020
"metadata": {},
2121
"outputs": [
2222
{
2323
"name": "stdout",
2424
"output_type": "stream",
2525
"text": [
26-
"--2020-01-20 22:14:56-- https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz\n",
27-
"Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.216.166.53\n",
28-
"Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.216.166.53|:443... connected.\n",
26+
"--2020-06-03 23:13:08-- https://s3.amazonaws.com/dl4j-distribution/GoogleNews-vectors-negative300.bin.gz\n",
27+
"Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.16.238\n",
28+
"Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.16.238|:443... connected.\n",
2929
"HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable\n",
3030
"\n",
3131
" The file is already fully retrieved; nothing to do.\n",
@@ -39,7 +39,7 @@
3939
},
4040
{
4141
"cell_type": "code",
42-
"execution_count": 1,
42+
"execution_count": 2,
4343
"metadata": {},
4444
"outputs": [],
4545
"source": [
@@ -48,7 +48,7 @@
4848
},
4949
{
5050
"cell_type": "code",
51-
"execution_count": 2,
51+
"execution_count": 3,
5252
"metadata": {},
5353
"outputs": [],
5454
"source": [
@@ -58,20 +58,20 @@
5858
},
5959
{
6060
"cell_type": "code",
61-
"execution_count": 3,
61+
"execution_count": 4,
6262
"metadata": {},
6363
"outputs": [
6464
{
6565
"data": {
6666
"text/plain": [
67-
"[('coffees', 0.721267819404602),\n",
68-
" ('gourmet_coffee', 0.7057087421417236),\n",
69-
" ('Coffee', 0.6900454759597778),\n",
70-
" ('o_joe', 0.6891065835952759),\n",
71-
" ('Starbucks_coffee', 0.6874972581863403)]"
67+
"[('pal', 0.7476358413696289),\n",
68+
" ('friends', 0.7098034620285034),\n",
69+
" ('buddy', 0.6972494125366211),\n",
70+
" ('dear_friend', 0.6960037350654602),\n",
71+
" ('acquaintance', 0.6843010187149048)]"
7272
]
7373
},
74-
"execution_count": 3,
74+
"execution_count": 4,
7575
"metadata": {},
7676
"output_type": "execute_result"
7777
}
@@ -83,7 +83,7 @@
8383
},
8484
{
8585
"cell_type": "code",
86-
"execution_count": 7,
86+
"execution_count": 5,
8787
"metadata": {},
8888
"outputs": [
8989
{
@@ -92,7 +92,7 @@
9292
"[('queen', 0.7118192911148071)]"
9393
]
9494
},
95-
"execution_count": 7,
95+
"execution_count": 5,
9696
"metadata": {},
9797
"output_type": "execute_result"
9898
}
@@ -104,7 +104,7 @@
104104
},
105105
{
106106
"cell_type": "code",
107-
"execution_count": 5,
107+
"execution_count": 6,
108108
"metadata": {},
109109
"outputs": [
110110
{
@@ -113,7 +113,7 @@
113113
"300"
114114
]
115115
},
116-
"execution_count": 5,
116+
"execution_count": 6,
117117
"metadata": {},
118118
"output_type": "execute_result"
119119
}
@@ -125,91 +125,76 @@
125125
},
126126
{
127127
"cell_type": "code",
128-
"execution_count": 6,
128+
"execution_count": 7,
129129
"metadata": {},
130130
"outputs": [
131131
{
132132
"data": {
133133
"text/plain": [
134-
"array([-1.61132812e-01, -1.36718750e-01, -3.73046875e-01, 6.17187500e-01,\n",
135-
" 1.08398438e-01, 2.72216797e-02, 1.00097656e-01, -1.51367188e-01,\n",
136-
" -1.66015625e-02, 3.80859375e-01, 6.54296875e-02, -1.31835938e-01,\n",
137-
" 2.53906250e-01, 9.08203125e-02, 2.86865234e-02, 2.53906250e-01,\n",
138-
" -2.05078125e-01, 1.64062500e-01, 2.20703125e-01, -1.74804688e-01,\n",
139-
" -2.01171875e-01, 1.30859375e-01, -3.22265625e-02, -2.41210938e-01,\n",
140-
" -3.19824219e-02, 2.48046875e-01, -2.37304688e-01, 2.89062500e-01,\n",
141-
" 1.64794922e-02, 1.29394531e-02, 1.72119141e-02, -3.53515625e-01,\n",
142-
" -1.66992188e-01, -5.90820312e-02, -2.81250000e-01, 9.94873047e-03,\n",
143-
" -1.94091797e-02, -3.22265625e-01, 1.73339844e-02, -5.83496094e-02,\n",
144-
" -2.59765625e-01, 1.42669678e-03, 5.81054688e-02, 1.13769531e-01,\n",
145-
" -8.64257812e-02, 3.54003906e-02, -4.29687500e-01, 2.86865234e-03,\n",
146-
" 6.98852539e-03, 1.80664062e-01, -1.79687500e-01, 2.95410156e-02,\n",
147-
" -1.56250000e-01, -2.08007812e-01, -9.08203125e-02, 4.15039062e-03,\n",
148-
" 1.07421875e-01, 3.12500000e-01, -1.04980469e-01, -3.24218750e-01,\n",
149-
" -1.24023438e-01, -7.05718994e-04, -1.05957031e-01, 2.12890625e-01,\n",
150-
" 1.12304688e-01, -1.58203125e-01, -1.67968750e-01, -9.71679688e-02,\n",
151-
" 1.53320312e-01, -1.11328125e-01, 3.22265625e-01, 2.28515625e-01,\n",
152-
" 3.20312500e-01, -1.72119141e-02, -4.57031250e-01, 3.23486328e-03,\n",
153-
" -1.76757812e-01, -5.00488281e-02, 3.05175781e-02, -2.75390625e-01,\n",
154-
" -1.65039062e-01, -3.56445312e-02, 7.95898438e-02, 1.35742188e-01,\n",
155-
" -8.64257812e-02, -7.32421875e-02, 1.36718750e-01, 2.33398438e-01,\n",
156-
" 7.95898438e-02, 1.32446289e-02, -4.71191406e-02, 1.01074219e-01,\n",
157-
" 2.37304688e-01, -1.81640625e-01, -2.14843750e-01, -1.65039062e-01,\n",
158-
" -1.66015625e-02, -1.51367188e-01, 3.06640625e-01, -2.40234375e-01,\n",
159-
" -2.29492188e-01, -1.29882812e-01, 8.97216797e-03, 1.97265625e-01,\n",
160-
" 7.47070312e-02, -1.64031982e-03, 1.54296875e-01, -6.80541992e-03,\n",
161-
" -1.12304688e-01, -7.61718750e-02, -8.74023438e-02, -1.31835938e-01,\n",
162-
" -2.94921875e-01, -2.46093750e-01, 6.15234375e-02, -1.23046875e-01,\n",
163-
" -8.34960938e-02, -8.39843750e-02, -1.61132812e-02, -4.30297852e-03,\n",
164-
" -4.05273438e-02, -2.84423828e-02, 1.36718750e-01, 2.13623047e-02,\n",
165-
" -2.81250000e-01, 2.40234375e-01, -3.75976562e-02, -9.66796875e-02,\n",
166-
" 1.28906250e-01, 1.43554688e-01, -1.37695312e-01, -1.38549805e-02,\n",
167-
" -4.12597656e-02, -4.51660156e-02, -3.75976562e-02, 1.89453125e-01,\n",
168-
" 5.32226562e-02, 1.17675781e-01, -8.25195312e-02, -1.56250000e-01,\n",
169-
" 1.47460938e-01, -2.63671875e-01, -2.79296875e-01, -4.31640625e-01,\n",
170-
" -5.90820312e-02, 2.74658203e-03, 2.87109375e-01, -2.71606445e-03,\n",
171-
" -2.46093750e-01, 2.74658203e-02, -9.08203125e-02, 6.54296875e-02,\n",
172-
" -1.94335938e-01, -2.16064453e-02, 2.77343750e-01, 5.98144531e-02,\n",
173-
" 2.33154297e-02, -1.37695312e-01, -5.39062500e-01, -1.64794922e-02,\n",
174-
" -1.25976562e-01, -1.36718750e-01, 3.02734375e-02, 2.50000000e-01,\n",
175-
" 5.53131104e-04, 1.36718750e-01, 2.96875000e-01, -5.10253906e-02,\n",
176-
" 9.08203125e-02, -2.39257812e-01, 1.35742188e-01, 1.11328125e-01,\n",
177-
" 1.96289062e-01, -1.54296875e-01, -3.37890625e-01, -3.36914062e-02,\n",
178-
" -9.47265625e-02, -1.69921875e-01, -1.04003906e-01, 1.46484375e-01,\n",
179-
" 4.54101562e-02, -4.12109375e-01, -2.47070312e-01, -6.10351562e-03,\n",
180-
" 4.55078125e-01, -2.35595703e-02, 4.93164062e-02, 1.42578125e-01,\n",
181-
" 2.66113281e-02, 4.11987305e-03, -7.27539062e-02, 2.53906250e-02,\n",
182-
" -3.39355469e-02, 7.91015625e-02, 2.87109375e-01, 3.88671875e-01,\n",
183-
" -1.58691406e-02, -8.44726562e-02, -1.15722656e-01, -1.22558594e-01,\n",
184-
" -1.02050781e-01, 1.32812500e-01, 2.21679688e-01, -2.03125000e-01,\n",
185-
" 7.91015625e-02, 1.69677734e-02, 2.16796875e-01, 2.33398438e-01,\n",
186-
" -2.08984375e-01, -1.36718750e-01, -2.45117188e-01, 3.93066406e-02,\n",
187-
" -1.80664062e-01, 1.37695312e-01, 1.50390625e-01, -3.90625000e-02,\n",
188-
" -1.32812500e-01, 2.75878906e-02, -1.78710938e-01, 1.55273438e-01,\n",
189-
" 1.36718750e-01, -1.14257812e-01, -2.79296875e-01, -7.86132812e-02,\n",
190-
" 3.08593750e-01, -5.32226562e-02, -1.65039062e-01, 5.83496094e-02,\n",
191-
" 2.19726562e-01, -1.25000000e-01, 6.10351562e-02, -3.39355469e-02,\n",
192-
" -3.16406250e-01, 2.14843750e-01, -4.12597656e-02, -1.94335938e-01,\n",
193-
" 7.76367188e-02, -5.21850586e-03, 6.93359375e-02, 2.18750000e-01,\n",
194-
" 1.71875000e-01, -1.97265625e-01, 1.07910156e-01, 8.25195312e-02,\n",
195-
" 3.39355469e-02, -1.15722656e-01, -2.02941895e-03, 4.83398438e-02,\n",
196-
" 1.50390625e-01, -2.73437500e-01, -9.61914062e-02, 3.39843750e-01,\n",
197-
" 2.98828125e-01, 1.32812500e-01, -3.68652344e-02, -3.08593750e-01,\n",
198-
" 2.94189453e-02, -1.31835938e-01, -7.12890625e-02, -2.57873535e-03,\n",
199-
" -1.17187500e-01, 6.34765625e-03, -1.66992188e-01, 2.01171875e-01,\n",
200-
" -1.33789062e-01, -1.77734375e-01, -1.09863281e-01, 5.06591797e-03,\n",
201-
" -1.07910156e-01, -1.30859375e-01, -5.17578125e-02, 2.57812500e-01,\n",
202-
" 5.41992188e-02, -6.34765625e-03, 3.00598145e-03, 7.95898438e-02,\n",
203-
" -2.37304688e-01, -8.05664062e-02, 6.07910156e-02, 9.27734375e-02,\n",
204-
" 1.65039062e-01, -1.22558594e-01, 1.88476562e-01, 2.50000000e-01,\n",
205-
" -1.42578125e-01, -7.91015625e-02, -1.78710938e-01, 1.52343750e-01,\n",
206-
" -7.76367188e-02, 2.42187500e-01, 2.56347656e-02, -1.26953125e-01,\n",
207-
" -1.25000000e-01, -3.19824219e-02, -1.27929688e-01, 1.49414062e-01,\n",
208-
" -1.34277344e-02, 6.59179688e-02, 2.17773438e-01, 2.02148438e-01],\n",
134+
"array([ 0.07080078, -0.21386719, 0.15332031, 0.09423828, -0.03442383,\n",
135+
" 0.43359375, -0.16503906, -0.05786133, 0.17578125, -0.08203125,\n",
136+
" 0.24511719, -0.19335938, -0.0255127 , -0.09619141, -0.125 ,\n",
137+
" 0.02575684, 0.16796875, -0.03759766, 0.09472656, -0.04760742,\n",
138+
" 0.20605469, 0.31835938, 0.15917969, -0.17089844, 0.09033203,\n",
139+
" -0.1640625 , -0.15234375, 0.3125 , 0.06298828, -0.24902344,\n",
140+
" 0.15625 , -0.04516602, -0.12890625, -0.00686646, -0.02160645,\n",
141+
" 0.14453125, 0.2734375 , 0.12695312, 0.10742188, 0.11376953,\n",
142+
" 0.14355469, -0.00173187, 0.22851562, -0.03515625, 0.17089844,\n",
143+
" 0.04516602, -0.07958984, -0.08886719, -0.01342773, -0.09667969,\n",
144+
" -0.12597656, 0.10595703, 0.15332031, -0.03808594, 0.02246094,\n",
145+
" 0.01428223, -0.03295898, 0.20703125, -0.03417969, 0.02233887,\n",
146+
" 0.00244141, 0.13476562, -0.01403809, 0.13378906, 0.0201416 ,\n",
147+
" 0.14746094, 0.00759888, -0.18652344, 0.16113281, 0.109375 ,\n",
148+
" 0.14355469, 0.01623535, 0.01867676, 0.09179688, -0.33789062,\n",
149+
" 0.19335938, -0.29101562, -0.00860596, 0.10644531, 0.359375 ,\n",
150+
" 0.25585938, -0.03320312, 0.15625 , -0.24316406, -0.06738281,\n",
151+
" 0.09033203, -0.125 , 0.21777344, -0.02380371, -0.06445312,\n",
152+
" -0.14355469, 0.05664062, -0.12597656, 0.02172852, 0.03833008,\n",
153+
" -0.17578125, -0.08349609, 0.21386719, -0.01855469, -0.23535156,\n",
154+
" -0.14746094, -0.16113281, -0.03125 , -0.10107422, 0.07080078,\n",
155+
" 0.01135254, -0.04370117, 0.07666016, 0.16503906, 0.04541016,\n",
156+
" -0.13867188, 0.13085938, 0.13378906, -0.14453125, 0.12792969,\n",
157+
" -0.06787109, -0.04296875, -0.03369141, 0.10302734, 0.22949219,\n",
158+
" 0.14160156, -0.01153564, -0.00086212, -0.10449219, -0.03710938,\n",
159+
" 0.01928711, 0.16699219, -0.06079102, 0.09814453, 0.0703125 ,\n",
160+
" -0.39648438, -0.23242188, -0.04077148, 0.09570312, -0.0546875 ,\n",
161+
" -0.09814453, 0.09082031, 0.03588867, 0.09228516, 0.3125 ,\n",
162+
" 0.10595703, 0.18847656, -0.11230469, 0.00842285, 0.08935547,\n",
163+
" 0.04663086, -0.25 , -0.03369141, 0.03808594, -0.03710938,\n",
164+
" 0.42773438, 0.10839844, -0.01391602, -0.01965332, -0.04296875,\n",
165+
" -0.11035156, 0.0390625 , 0.04541016, -0.20019531, -0.14355469,\n",
166+
" -0.14257812, 0.03662109, 0.25 , 0.3671875 , -0.12304688,\n",
167+
" -0.0859375 , 0.24902344, -0.21582031, 0.02648926, 0.17871094,\n",
168+
" 0.29296875, 0.21582031, 0.1015625 , 0.00167084, -0.07177734,\n",
169+
" 0.03686523, 0.22851562, -0.125 , 0.17285156, 0.22265625,\n",
170+
" 0.21191406, 0.03686523, 0.09570312, -0.00344849, 0.13183594,\n",
171+
" -0.23925781, 0.00576782, 0.27148438, 0.10400391, 0.0098877 ,\n",
172+
" -0.24511719, 0.21777344, -0.03027344, 0.23046875, 0.11816406,\n",
173+
" 0.1640625 , -0.00109863, 0.00349426, -0.02197266, -0.09179688,\n",
174+
" -0.10351562, 0.06933594, -0.13476562, -0.06201172, 0.14355469,\n",
175+
" -0.10888672, -0.11328125, 0.2109375 , -0.10839844, -0.18261719,\n",
176+
" -0.06689453, -0.265625 , -0.13378906, -0.04296875, -0.17773438,\n",
177+
" 0.00689697, -0.00982666, -0.00640869, -0.12792969, 0.08203125,\n",
178+
" -0.01367188, 0.02734375, 0.12597656, -0.00772095, -0.04614258,\n",
179+
" -0.12255859, 0.16210938, 0.28320312, 0.04296875, -0.05175781,\n",
180+
" -0.16210938, 0.14648438, -0.18359375, -0.24511719, 0.22167969,\n",
181+
" 0.0546875 , -0.10302734, -0.07763672, -0.33984375, -0.05908203,\n",
182+
" -0.0022583 , -0.11962891, -0.3046875 , 0.02233887, 0.02941895,\n",
183+
" 0.37695312, -0.01721191, -0.05932617, 0.30273438, -0.13574219,\n",
184+
" 0.14746094, 0.17089844, 0.16015625, 0.21484375, 0.01013184,\n",
185+
" 0.06738281, -0.12109375, -0.12304688, -0.20117188, 0.02880859,\n",
186+
" -0.00662231, -0.20410156, 0.02001953, -0.15136719, 0.16699219,\n",
187+
" 0.14160156, -0.02331543, 0.14550781, -0.13476562, 0.04785156,\n",
188+
" 0.14160156, 0.03808594, -0.12109375, 0.02770996, -0.0123291 ,\n",
189+
" -0.20410156, -0.06445312, 0.06079102, -0.07519531, -0.28125 ,\n",
190+
" 0.18261719, -0.25390625, -0.0456543 , 0.14160156, -0.0546875 ,\n",
191+
" -0.01477051, -0.38085938, 0.14355469, 0.12255859, 0.14941406,\n",
192+
" -0.03320312, 0.19433594, -0.34375 , -0.24902344, -0.00331116,\n",
193+
" -0.05639648, -0.00079727, -0.21679688, -0.01977539, 0.10644531],\n",
209194
" dtype=float32)"
210195
]
211196
},
212-
"execution_count": 6,
197+
"execution_count": 7,
213198
"metadata": {},
214199
"output_type": "execute_result"
215200
}

0 commit comments

Comments
 (0)