web scraping - how to have go find packages online? -


i'm trying run go program using liteide x22 message

c:/go/bin/go.exe build  [c:/users/admins/desktop/desktp/worm_scraper-master] worm_scraper.go:11:2: cannot find package "github.com/codegangsta/cli" in of:     c:\go\src\pkg\github.com\codegangsta\cli (from $goroot)     c:\users\admins\gostuff\src\github.com\codegangsta\cli (from $gopath) worm_scraper.go:12:2: cannot find package "github.com/puerkitobio/goquery" in of:     c:\go\src\pkg\github.com\puerkitobio\goquery (from $goroot)     c:\users\admins\gostuff\src\github.com\puerkitobio\goquery (from $gopath) error: process exited code 1. 

i think means it's looking on harddrive instead of online right? (btw i'm pretty clueless programming trying else wrote) how access web? here's full code

package main  import (     "errors"     "fmt"     "os"     "os/exec"     "regexp"     "strings"      "github.com/codegangsta/cli"     "github.com/puerkitobio/goquery" )  const (     mainsite        = "https://parahumans.wordpress.com/"     tableofcontents = "https://parahumans.wordpress.com/table-of-contents/" )  type arc struct {     identifier string     title      string     chapters   []chapter }  type chapter struct {     title      string     url        string     tags       []string     paragraphs []paragraph     retries    int     dateposted string }  type paragraph string  // format paragraph func (p *paragraph) format() {     s := string(*p)      // handle emphasis     s = strings.replace(s, "<em>", "*", -1)     s = strings.replace(s, "</em>", "*", -1)     s = strings.replace(s, "<i>", "*", -1)     s = strings.replace(s, "</i>", "*", -1)      // handle bold     s = strings.replace(s, "<strong>", "**", -1)     s = strings.replace(s, "</strong>", "**", -1)     s = strings.replace(s, "<b>", "**", -1)     s = strings.replace(s, "</b>", "**", -1)      // remove new lines     s = strings.replace(s, "\n", "", -1)      // , random double spaces     s = strings.replace(s, ".  ", ". ", -1)      *p = paragraph(s) }  // return arc given chapter belongs func (ch *chapter) whicharc(arclist []*arc) (*arc, error) {     _, arc := range arclist {         if strings.replace(ch.title[:2], ".", "", -1) == arc.identifier {             return arc, nil         }     }     return &arc{}, errors.new("chapter '" + ch.title + "' did not match arcs") }  // parse chapter , return func (ch *chapter) parse(done chan bool) {     if ch.retries > 3 {         panic("chapter url '" + ch.url + "' has timed out many times")     }     // chapter     if strings.hasprefix(ch.url, "http") == false {         // make sure begins http goquery can use         ch.url = "https://" + ch.url     }     doc, err := goquery.newdocument(ch.url)     if err != nil {         // try again         ch.retries++         go ch.parse(done)         return     }      // set new chapter title     ch.title = doc.find("h1.entry-title").text()      // set tags     doc.find(".entry-meta a[rel=tag]").each(func(_ int, s *goquery.selection) {         ch.tags = append(ch.tags, s.text())         if len(ch.tags) == 0 {             ch.tags = append(ch.tags, "none")         }     })      // date posted     ch.dateposted = doc.find("time.entry-date").text()      // we'll paragraphs     doc.find(".entry-content > p").each(func(_ int, s *goquery.selection) {         // check previous/next links         if len(s.find("a").nodes) > 0 {             return         }          // paragraph html         st, _ := s.html()         para := paragraph("")          // actual paragraph         if val, exists := s.attr("padding-left"); exists && val == "30px" {             // check see if paragraph special (indented) block             para = paragraph("    " + st)         } else if val, exists := s.attr("text-align"); exists && val == "center" {             // otherwise check see if it's separator paragraph             para = paragraph("----------")         } else {             // it's normal paragraph in case             para = paragraph(st)         }          // , add paragraph chapter         para.format()         ch.paragraphs = append(ch.paragraphs, para)     })      // finally, let's signal success     done <- true }  // return slice of arcs extracted table of contents func parsearcs(s string) []*arc {     arcs := []*arc{}     r, _ := regexp.compile(`[0-9]+`)     _, line := range strings.split(s, "\n") {         line = strings.trimspace(line)         if strings.hasprefix(line, "arc") {             arcs = append(arcs, &arc{                 identifier: r.findstring(line),                 title:      line,             })         } else if strings.hasprefix(line, "epilogue") {             arcs = append(arcs, &arc{                 identifier: "e",                 title:      line,             })         }     }     return arcs }  func main() {     // define app     app := cli.newapp()     app.name = "worm scraper"     app.usage = "a tool let updated epub copy of serial web novel worm, wildbow"     app.version = "1.0"     app.author = "benjamin harris"      // define application flags     app.flags = []cli.flag{         cli.boolflag{"pdf", "save book pdf instead of epub, if possible"},         cli.boolflag{"with-link", "include link chapter online"},         cli.boolflag{"with-tags", "include tags each chapter posted under"},         cli.boolflag{"with-date", "include date each chapter posted"},     }      // heart of application     app.action = func(context *cli.context) {         // starting program         fmt.println("starting scrape worm")          // list of arcs table of contents         fmt.println("gathering links table of contents...")         contents, err := goquery.newdocument(tableofcontents)         if err != nil {             panic("failed table of contents! " + err.error())         }          // parse arcs         arcs := parsearcs(contents.find(".entry-content").text())          // links arc chapters         contents.find(".entry-content a:not([class*=share-icon])").each(func(_ int, s *goquery.selection) {             ch := chapter{}             ch.title = strings.replace(strings.trimspace(s.text()), "\n", "", -1)             ch.url, _ = s.attr("href")              if ch.title == "" {                 return             }              arc, _ := ch.whicharc(arcs)             arc.chapters = append(arc.chapters, ch)         })          // manually add missing chapter in epilogue         c := chapter{             title: "e.2",             url:   "https://parahumans.wordpress.com/2013/11/05/teneral-e-2/",         }         a, _ := c.whicharc(arcs)         a.chapters = append(a.chapters, c)         copy(a.chapters[1+1:], a.chapters[1:])         a.chapters[1] = c          // start getting chapters         chapters := 0         done := make(chan bool)         _, arc := range arcs {             i, _ := range arc.chapters {                 chapters++                 go arc.chapters[i].parse(done)             }         }          fmt.println("starting parse", chapters, "chapters")         fmt.print("finished: ")          totalchapters := chapters         {             select {             case <-done:                 chapters--                 fmt.print(totalchapters-chapters, ",")             }             if chapters == 0 {                 // we're done chapters                 close(done)                 fmt.println()                 break             }         }          // , let's write stuff file         fmt.println("saving results file...")         f, err := os.openfile("worm.md", os.o_rdwr|os.o_create|os.o_excl, 0666)         if err != nil {             panic(err)         }         defer f.close()          // define pagebreak         pagebreak := "\n\n"          // write cover         f.writestring("# worm\n\n")         f.writestring("by wildbow\n\n")         f.writestring("website: " + mainsite)          // loop through arcs         _, arc := range arcs {             f.writestring(pagebreak + "# " + arc.title)             _, chapter := range arc.chapters {                 f.writestring("\n\n")                 f.writestring("## " + chapter.title + "\n\n")                 if context.bool("with-tags") {                     f.writestring("**tags:** " + strings.join(chapter.tags, ", ") + "  ")                 }                 if context.bool("with-date") {                     f.writestring("**date:** " + chapter.dateposted + "  ")                 }                 if context.bool("with-link") {                     f.writestring("**link:** " + chapter.url + "  ")                 }                 f.writestring("\n\n")                  // save chapter's paragraphs                 _, p := range chapter.paragraphs {                     f.writestring(string(p) + "\n\n")                 }             }         }          // let's try convert markdown file ebook format (epub, pdf)         fmt.print("attempting convert markdown file... ")         cmdtext := []string{"-s", "worm.md", "--epub-chapter-level", "2", "-o", "worm.epub"}         if context.bool("pdf") {             cmdtext = []string{"worm.md", "-o", "worm.pdf"}             pagebreak = `<div style="page-break-after: always;"></div>`         }         cmd := exec.command("pandoc", cmdtext...)         err = cmd.run()         if err != nil {             fmt.println("conversion failed! make sure you've installed pandoc (http://johnmacfarlane.net/pandoc/installing.html) if want convert generated markdown file ebook compatible format. in meantime, we've left markdown file.")         } else {             _ = os.remove("worm.md")             fmt.println("completed!")         }     }      // run application     app.run(os.args) } 

oh possible modify output .txt or .mobi? if not i'll convert using calibre. in advance. oh if matters i'm using windows 7 64-bit

the go compiler doesn't import libraries directly internet know how fetch them you. when import github.com/codegangsta/cli doesn't on url instead looks on gopath/src folder.

the go get command can fetch library in it's url , download gopath.

if have setup gopath (if not, read how write go code) before running code run command go library go tool download you. in example should run following commands:

go github.com/codegangsta/cli go github.com/puerkitobio/goquery 

that download libraries gopath/src/github.com/codegangsta/cli , gopath/src/github.com/puerkitobio/goquery respectively.


Comments

Popular posts from this blog

javascript - RequestAnimationFrame not working when exiting fullscreen switching space on Safari -

Python ctypes access violation with const pointer arguments -