From 4771b9731dabc5edb9cfcdc9d8d7f79f8dfc2bca Mon Sep 17 00:00:00 2001 From: Tammo Behrends Date: Fri, 20 Nov 2015 20:39:19 +0100 Subject: [PATCH] word-count: Change separator from whitespace to non alphanumeric Change the separator as discussed in #177 to non alphanumeric as it is also done in the haskell track. --- word-count/example.py | 7 +++---- word-count/word_count_test.py | 8 +++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/word-count/example.py b/word-count/example.py index 121268dad03..1b5475c1927 100644 --- a/word-count/example.py +++ b/word-count/example.py @@ -2,7 +2,6 @@ def word_count(text): - """Return a Counter object that maps from the words contained in - the phrase to their respective counts - """ - return Counter(text.lower().split()) + replace_nonalpha = lambda c: c.lower() if c.isalnum() else ' ' + text = ''.join(replace_nonalpha(c) for c in text) + return Counter(text.split()) diff --git a/word-count/word_count_test.py b/word-count/word_count_test.py index d9ce565daaa..13f48204aa2 100644 --- a/word-count/word_count_test.py +++ b/word-count/word_count_test.py @@ -25,7 +25,7 @@ def test_count_multiple_occurences(self): def test_preserves_punctuation(self): self.assertEqual( - {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, ':': 2, 'javascript!!&@$%^&': 1}, + {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&') ) @@ -63,5 +63,11 @@ def test_tabs(self): 'want your bad romance') ) + def test_non_alphanumeric(self): + self.assertEqual( + {'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1}, + word_count('hey,my_spacebar_is_broken.') + ) + if __name__ == '__main__': unittest.main()