闲来无事,最近学了golang,并且写了一个爬虫爬人家网站的泛域名的标题,记录一下。功能有:可以随机ua,没做代理ip,自动创建baidu.txt和自动保存爬到的标题到baidu.txt
闲来无事,最近学了golang,并且写了一个爬虫爬人家网站的泛域名的标题,记录一下。功能有:可以随机ua,没做代理ip,自动创建baidu.txt和自动保存爬到的标题到baidu.txt
package main import ( "fmt" "io/ioutil" "net/http" "regexp" "math/rand" "time" "os" ) var workerCount=0 var maxworKerCount=32 var searchRequest=make(chan string) var workerDone=make(chan bool) var foundMatch=make(chan bool) var cookie string ="in_userid=1; in_username=xxxxxqq.com; in_userpassword=xxxxx; in_adminid=3; in_adminname=xxxxxqq.com; in_adminpassword=xxxxx; in_permission=1%2C2%2C3%2C4%2C5%2C6" var userAgentList = []string{"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36 SE 2.X MetaSr 1.0", } func GetRandomUserAgent() string { r := rand.New(rand.NewSource(time.Now().UnixNano())) return userAgentList[r.Intn(len(userAgentList))] } func getUrlRespHtml() string { rand.Seed(time.Now().UnixNano()) url := "http://"+randomString(5)+".wholdlj.cn/" client := &http.Client{} req, err := http.NewRequest("GET", url, nil) if err!=nil { fmt.Println("获取地址错误") } req.Header.Add("User-Agent",GetRandomUserAgent() ) resp, err := client.Do(req) if err!=nil { fmt.Println("获取地址错误") } resp_byte, err := ioutil.ReadAll(resp.Body) defer resp.Body.Close() respHtml := string(resp_byte) return respHtml } func Initialize(){ reader := getUrlRespHtml() Reg := `<title>(.*?)</title>` rp1 := regexp.MustCompile(Reg) heads := rp1.FindAllStringSubmatch(reader, -1) //打开文件 fo,err:=os.OpenFile("baidu.txt",os.O_APPEND,6) if err!=nil{ fmt.Println("os.OpenFile err",err) return } for _, v := range heads { fo.Write([]byte(v[1]+"\r\n")) fmt.Printf("title:%s\r\n",v[1]) } } // Returns an int >= min, < max func randomInt(min, max int) int { return min + rand.Intn(max-min) } // Generate a random string of A-Z chars with len = l func randomString(len int) string { bytes := make([]byte, len) for i := 0; i < len; i++ { bytes[i] = byte(randomInt(65, 90)) } return string(bytes) } func mains() { for i :=0;i<50000;i++{ Initialize() } } func main() { //创建文件 star:= time.Now() fc,err:=os.Create("baidu.txt") if err!=nil{ fmt.Println("os.Create err",err) return } fc.Close() mains() fmt.Println(time.Since(star)) }
转载请注明来自本站(66娱乐网)
66优乐网 » golang第一个爬虫实战
66优乐网 » golang第一个爬虫实战