Skip to content

Commit

Permalink
Added search functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
lukecold committed Nov 21, 2015
1 parent 81f034f commit ab26e61
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 44 deletions.
39 changes: 16 additions & 23 deletions api/client.go → api/analyzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,37 +10,30 @@ import (
. "strings"
)

/*
* YouTube client.
*/
type Client struct {
VideoRepository string
}

/*
* Get a video list from given id.
*/
func (cl *Client) GetVideoListFromId(id string) (VideoList, error) {
func GetVideoListFromId(id string) (VideoList, error) {
url := "https://www.youtube.com/watch?v=" + id
return cl.GetVideoListFromUrl(url)
return GetVideoListFromUrl(url)
}

/*
* Get a video list from given url.
*/
func (cl *Client) GetVideoListFromUrl(url string) (vl VideoList, err error) {
func GetVideoListFromUrl(url string) (vl VideoList, err error) {
//Get webpage content from url
body, err := cl.GetHttpFromUrl(url)
body, err := GetHttpFromUrl(url)
if err != nil {
return
}
//Extract json data from webpage content
jsonData, err := cl.GetJsonFromHttp(body)
jsonData, err := GetJsonFromHttp(body)
if err != nil {
return
}
//Fetch video list according to json data
vl, err = cl.GetVideoListFromJson(jsonData)
vl, err = GetVideoListFromJson(jsonData)
if err != nil {
return
}
Expand All @@ -50,38 +43,38 @@ func (cl *Client) GetVideoListFromUrl(url string) (vl VideoList, err error) {
/*
* Initialize a GET request, and get the http code of the webpage.
*/
func (cl *Client) GetHttpFromUrl(url string) ([]byte, error) {
func GetHttpFromUrl(url string) (body []byte, err error) {
resp, err := http.Get(url)
if err != nil {
return nil, err
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
body, err = ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
return
}
return body, nil
return
}

/*
* Get json data from http code.
*/
func (*Client) GetJsonFromHttp(httpData []byte) (map[string]interface{}, error) {
func GetJsonFromHttp(httpData []byte) (map[string]interface{}, error) {
//Find out if this page is age-restricted
if bytes.Index(httpData, []byte("og:restrictions:age")) != -1 {
return nil, errors.New("this page is age-restricted")
}
//Find begining of json data
var jsonBeg = "ytplayer.config = {"
jsonBeg := "ytplayer.config = {"
beg := bytes.Index(httpData, []byte(jsonBeg))
if beg == -1 { //pattern not found
return nil, PatternNotFoundError{_pattern: jsonBeg}
}
beg += len(jsonBeg) //len(jsonBeg) returns the number of bytes in jsonBeg

//Find offset of json data
var unmatchedBrackets = 1
var offset = 0
unmatchedBrackets := 1
offset := 0
for unmatchedBrackets > 0 {
nextRight := bytes.Index(httpData[beg+offset:], []byte("}"))
if nextRight == -1 {
Expand All @@ -104,7 +97,7 @@ func (*Client) GetJsonFromHttp(httpData []byte) (map[string]interface{}, error)
/*
* Get video list from json data retrieved from http code.
*/
func (*Client) GetVideoListFromJson(jsonData map[string]interface{}) (vl VideoList, err error) {
func GetVideoListFromJson(jsonData map[string]interface{}) (vl VideoList, err error) {
args := jsonData["args"].(map[string]interface{})
vl.Title = args["title"].(string)
encodedStreamMap := args["url_encoded_fmt_stream_map"].(string)
Expand Down
113 changes: 113 additions & 0 deletions api/search.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package gotube

import (
"bytes"
"errors"
"net/url"
"strconv"
. "strings"
)

/*
* Get the top k video id from the search result.
* If k is larger than the number of search result, this function would return all the search result
*/
func GetTopKVideoIds(keywords string, k int) ([]string, error) {
num := 0
pageNum := 1
set := make(map[string]bool)
var idList []string
for num < k {
//Get url of search result from #pageNum page
searchUrl, err := GetSearchUrl(keywords, pageNum)
if err != nil {
return idList, err
}
//Get list of video id from current page
idListOfPage, err := GetVideoIdsFromPage(searchUrl)
if err != nil {
return idList, err
}
//Add id from id list retrieved in current page to result until we already got top k or out of result
idIdx := 0
for num < k && idIdx < len(idListOfPage) {
_, ok := set[idListOfPage[idIdx]]
if ok { //We have ran out of search results, it's repeating the last page
MapToArray(set, &idList)
return idList, err
} else { //This id is new
set[idListOfPage[idIdx]] = true
}
idIdx++
num++
}
pageNum++
}
MapToArray(set, &idList)
return idList, nil
}

/*
* Get a search url from the provided keywords
*/
func GetSearchUrl(keywords string, pageNum int) (searchUrl string, err error) {
//Replace ' ' with '+', like what the YouTube search does
keywords = Map(
func(r rune) rune {
if r == ' ' {
r = '+'
}
return r
}, keywords)
//Escape keyword to safely put into url
keywords = url.QueryEscape(keywords)
searchUrl = "https://www.youtube.com/results?search_query=" + keywords
//Make sure page number is valid
switch {
case pageNum < 1:
err = errors.New("invalid page number")
return
case pageNum == 1:
//No action needed
case pageNum > 1:
searchUrl += "&page=" + strconv.Itoa(pageNum)
}
return
}

/*
* Parse the http data of the page get from url and retrieve the id list
*/
func GetVideoIdsFromPage(searchUrl string) (idList []string, err error) {
//Get the http code of the page get from url
body, err := GetHttpFromUrl(searchUrl)
if err != nil {
return
}
//Retrive id list
idBeg := []byte("class=\"yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile\" data-context-item-id=\"")
beg := 0
for {
//Find the index of begin pattern
offset := bytes.Index(body[beg:], idBeg)
if offset < 0 {
return
}
beg += offset + len(idBeg)
//Find the index of closing parenthesis
offset = bytes.Index(body[beg:], []byte("\""))
if offset < 0 {
err = errors.New("unmatched parenthesis")
return
}
end := beg + offset
idList = append(idList, string(body[beg:end]))
}
return
}

func MapToArray(m map[string]bool, a *[]string) {
for key, _ := range m {
*a = append(*a, key)
}
}
20 changes: 9 additions & 11 deletions api/video.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,19 +47,17 @@ func (video *Video) FindMissingFields() (missingFields []string) {
* Download this video into the repository,
* if repository is not generated, download to current folder.
*/
func (video *Video) Download(cl Client) error {
func (video *Video) Download(rep string) error {
//Get video from url
body, err := cl.GetHttpFromUrl(video.url)
body, err := GetHttpFromUrl(video.url)
if err != nil {
return err
}
var pathname string
if cl.VideoRepository != "" {
if rep != "" {
//Make a directory and give every user highest permission
os.MkdirAll(cl.VideoRepository, 0777)
pathname = cl.VideoRepository
if !HasSuffix(pathname, "/") {
pathname += "/"
os.MkdirAll(rep, 0777)
if !HasSuffix(rep, "/") {
rep += "/"
}
}

Expand All @@ -75,7 +73,7 @@ func (video *Video) Download(cl Client) error {
}
return r
}, filename)
filename = pathname + filename
filename = rep + filename
file, err := os.Create(filename)
if err != nil {
return err
Expand All @@ -100,12 +98,12 @@ func (vl *VideoList) Append(v Video) {
* Filter the list first by the given key words,
* then download the first video in the list
*/
func (vl *VideoList) Download(cl Client, quality, extension string) (err error) {
func (vl *VideoList) Download(rep string, quality, extension string) (err error) {
vl.Filter(quality, extension)

//No matter how many left, pick the first one
video := vl.Videos[0]
err = video.Download(cl)
err = video.Download(rep)
return err
}

Expand Down
37 changes: 27 additions & 10 deletions main.go → script.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ func main() {
isRetList := flag.Bool("l", false, "use this flag to retrieve video list")
url := flag.String("url", "", "video url")
id := flag.String("id", "", "video id")
rep := flag.String("-VideoRepository", "", "(optional) repository to store videos")
search := flag.String("-search", "", "search by key words")
flag.StringVar(search, "s", "", "search by key words")
rep := flag.String("-videorepository", "", "(optional) repository to store videos")
flag.StringVar(rep, "rep", "", "(optional) repository to store videos")
quality := flag.String("-quality", "", "(optional) video quality. e.g. medium")
flag.StringVar(quality, "q", "", "(optional) video quality. e.g. medium")
Expand All @@ -35,11 +37,22 @@ func main() {
fmt.Println("Please choose if you want to download or retrieve video list.")
invalidCommand = true
}
if *url == "" && *id == "" {
fmt.Println("Please specify either url and id.")
//Find out how many sources are specified
sourceNum := 0
if *url != "" {
sourceNum++
}
if *id != "" {
sourceNum++
}
if *search != "" {
sourceNum++
}
if sourceNum == 0 {
fmt.Println("Please specify one of url, id, and key word(s).")
invalidCommand = true
} else if *url != "" && *id != "" {
fmt.Println("Please don't specify both url and id.")
} else if sourceNum > 1 {
fmt.Println("Please don't specify more than one of url, id, and key word(s).")
invalidCommand = true
}
if invalidCommand {
Expand All @@ -48,22 +61,26 @@ func main() {
return
}

//Initialize a client
cl := Client{VideoRepository: *rep}
//Get the video list
var vl VideoList
var err error
if *url != "" {
vl, err = cl.GetVideoListFromUrl(*url)
vl, err = GetVideoListFromUrl(*url)
} else if *id != "" {
vl, err = GetVideoListFromId(*id)
} else {
vl, err = cl.GetVideoListFromId(*id)
ids, err := GetTopKVideoIds(*search, 1)
if err != nil {
log.Fatal(err)
}
vl, err = GetVideoListFromId(ids[0])
}
if err != nil {
log.Fatal(err)
}
//Choose either downloading or retrieving video list
if *isDownload {
err = vl.Download(cl, *quality, *extension)
err = vl.Download(*rep, *quality, *extension)
if err != nil {
log.Fatal(err)
}
Expand Down

0 comments on commit ab26e61

Please sign in to comment.