中文网页正文内容提取 基于《基于行块分布函数的通用网页正文抽取算法》实现
go get github.com/yqingp/extractor
import (
"github.com/yqingp/extractor"
)
....
extract_worker := extractor.NewExtractor(url)
content, err := extract_worker.Extract()
if err != nil {
fmt.Println(content)
} go run example/server.go
require 'rest_client'
RestClient.post("http://localhost:8000/work", {:url => "http://www.baidu.com"})