package main
import (
"fmt"
"log"
"os"
"strings"
"sync"
"./php"
"github.com/tealeg/xlsx"
)
var wg sync.WaitGroup //定义一个同步等待的组
func main() {
fileName := "xxx_debug.log"
logFile, err := os.Create(fileName)
defer logFile.Close()
log.SetOutput(logFile)
arg_num := len(os.Args)
fmt.Printf("the num of input is %d ", arg_num)
if arg_num == 1 || !strings.Contains(os.Args[1], ".xlsx") {
fmt.Println("请输入****.xlsx文件作为参数")
return
}
fmt.Printf("they are : ")
for i := 0; i < arg_num; i++ {
fmt.Println(os.Args[i])
}
var (
excel_file_path string = os.Args[1]
file_result map[int]map[int]map[int]string = make(map[int]map[int]map[int]string)
sheet_result map[int]map[int]string = make(map[int]map[int]string)
)
//打开一个excel文件资源
f, err := xlsx.OpenFile(excel_file_path)
if err != nil {
log.Println(err.Error())
}
//循环文件中所有工作表
for sheet_key, sheet := range f.Sheets {
//循环对应工作表中行数
for key, row := range sheet.Rows {
row_result := make(map[int]string)
//循环工作表行数的每一列
for k, cell := range row.Cells {
row_result[k] = cell.Value
}
//如果为空不添加对应值到 数组
if !php.Empty(row_result) {
sheet_result[key] = row_result
}
}
//如果为空不添加对应值到 数组
if !php.Empty(sheet_result) {
file_result[sheet_key] = sheet_result
}
}
//输出表格的结果
for _, sheet := range file_result {
for k, _ := range sheet {
if k != 0 || !strings.Contains(sheet[k][1], "商品名称") {
log.Printf("%d=%v ", k, sheet[k][1])
wg.Add(1) //为同步等待组增加一个成员
go Spy(sheet[k][1])
}
}
}
wg.Wait() //阻塞等待所有组内成员都执行完毕退栈
fmt.Println("WE DONE!!!")
}
func Spy(urls string) {
defer func() {
wg.Done()
if r := recover(); r != nil {
log.Println("[E]", r)
}
}()
urls = url.QueryEscape(urls)
urlpath := tburl + urls + tburlpara
log.Println(urlpath)
req, err := http.NewRequest("GET", urlpath, nil)
if err != nil {
log.Printf("Get请求%s返回错误:%s", urlpath, err)
return
}
req.Header.Set("User-Agent", GetRandomUserAgent())
client := http.DefaultClient
res, e := client.Do(req)
if e != nil {
log.Printf("Get请求%s返回错误:%s", urlpath, e)
return
}
if res.StatusCode == 200 {
body := res.Body
defer body.Close()
bodyByte, _ := ioutil.ReadAll(body)
resStr := string(bodyByte)
ajson := atagRegExp.FindAllString(resStr, -1)
nlen := len(ajson[0])
if nlen > 16 {
jsons := ajson[0][16 : len(ajson[0])-2]
var v interface{}
json.Unmarshal([]byte(jsons), &v)
i := 0
minprice := 9999999.00
words, _ := dproxy.New(v).P("/mods/itemlist/data").M("query").String()
m := make(map[string][]string)
for {
set := dproxy.New(v).P("/mods/itemlist/data/auctions").A(i)
var u = make([]string, 0)
sales, err := set.M("view_sales").String()
if err != nil {
log.Printf("/mods/itemlist/data/auctions path error %v ", err.Error())
break
}
sales = strings.Replace(sales, "人付款", "", 1)
price, err := set.M("view_price").String()
title, err := set.M("raw_title").String()
url, err := set.M("detail_url").String()
inprice, err := strconv.ParseFloat(price, 32)
if err != nil {
log.Println("转换有错")
panic(fmt.Sprintf("%v 转换有错", price))
}
insales, err := strconv.Atoi(sales)
if err != nil {
log.Println("转换有错")
panic(fmt.Sprintf("%v 转换有错", price))
}
if minprice > inprice && inprice > 1 && insales >= 1 {
minprice = inprice
}
u = append(u, sales)
u = append(u, price)
u = append(u, title)
u = append(u, url)
fmt.Printf("%v===%v===%v ", title, sales, price)
log.Printf("%v===%v===%v ", title, sales, price)
i = i + 1
m[url] = u
}
fmt.Printf("%v ", minprice)
buildxlsx(words, m, minprice)
}
} else {
log.Printf("返回网页错误 %v", res.StatusCode)
}
}
var patherrch = [...]string{"/", "\", ":", "*", "?", """, "<", ">", "|"}
var userAgent = [...]string{"Mozilla/5.0 (compatible, MSIE 10.0, Windows NT, DigExt)",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, 360SE)",
"Mozilla/4.0 (compatible, MSIE 8.0, Windows NT 6.0, Trident/4.0)",
"Mozilla/5.0 (compatible, MSIE 9.0, Windows NT 6.1, Trident/5.0,",
"Opera/9.80 (Windows NT 6.1, U, en) Presto/2.8.131 Version/11.11",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, TencentTraveler 4.0)",
"Mozilla/5.0 (Windows, U, Windows NT 6.1, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Macintosh, Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
"Mozilla/5.0 (Macintosh, U, Intel Mac OS X 10_6_8, en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
"Mozilla/5.0 (Linux, U, Android 3.0, en-us, Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
"Mozilla/5.0 (iPad, U, CPU OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"Mozilla/4.0 (compatible, MSIE 7.0, Windows NT 5.1, Trident/4.0, SE 2.X MetaSr 1.0, SE 2.X MetaSr 1.0, .NET CLR 2.0.50727, SE 2.X MetaSr 1.0)",
"Mozilla/5.0 (iPhone, U, CPU iPhone OS 4_3_3 like Mac OS X, en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8J2 Safari/6533.18.5",
"MQQBrowser/26 Mozilla/5.0 (Linux, U, Android 2.3.7, zh-cn, MB200 Build/GRJ22, CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"}
var r = rand.New(rand.NewSource(time.Now().UnixNano()))
var tburl = "https://s.taobao.com/search?q="
var tburlpara = "&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.50862.201857-taobao-item.1&ie=utf8&initiative_id=tbindexz_20170306&sort=price-asc"
var urlChannel = make(chan string, 200) //chan中存入string类型的href属性,缓冲200
var atagRegExp = regexp.MustCompile(`g_page_config = (.*?); `) //以Must前缀的方法或函数都是必须保证一定能执行成功的,否则将引发一次panic
var chineseRegExp = regexp.MustCompile("^[u4e00-u9fa5]$")
func GetRandomUserAgent() string {
return userAgent[r.Intn(len(userAgent))]
}