From 4f4d22dabd279f30f3afefdacf83acf95b6dec92 Mon Sep 17 00:00:00 2001 From: Tammo Behrends Date: Sat, 21 Nov 2015 00:15:51 +0100 Subject: [PATCH] word-count: Add test case for unicode support See #177 --- word-count/example.py | 10 +++++++++- word-count/word_count_test.py | 15 +++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/word-count/example.py b/word-count/example.py index 1b5475c1927..1207bc6822d 100644 --- a/word-count/example.py +++ b/word-count/example.py @@ -1,7 +1,15 @@ from collections import Counter +# to be backwards compatible with the old Python 2.X +def decode_if_needed(string): + try: + return string.decode('utf-8') + except AttributeError: + return string + + def word_count(text): replace_nonalpha = lambda c: c.lower() if c.isalnum() else ' ' - text = ''.join(replace_nonalpha(c) for c in text) + text = ''.join(replace_nonalpha(c) for c in decode_if_needed(text)) return Counter(text.split()) diff --git a/word-count/word_count_test.py b/word-count/word_count_test.py index 13f48204aa2..1c3373c42e3 100644 --- a/word-count/word_count_test.py +++ b/word-count/word_count_test.py @@ -1,8 +1,17 @@ +# -*- coding: utf-8 -*- import unittest from wordcount import word_count +# to be backwards compatible with the old Python 2.X +def decode_if_needed(string): + try: + return string.decode('utf-8') + except AttributeError: + return string + + class WordCountTests(unittest.TestCase): def test_count_one_word(self): @@ -69,5 +78,11 @@ def test_non_alphanumeric(self): word_count('hey,my_spacebar_is_broken.') ) + def test_unicode(self): + self.assertEqual( + {decode_if_needed('до'): 1, decode_if_needed('свидания'): 1}, + word_count('до🖖свидания!') + ) + if __name__ == '__main__': unittest.main()