YTSFlix_Go/vendor/github.com/rylio/ytdl/video_info.go

403 lines
11 KiB
Go
Raw Normal View History

2018-11-04 14:58:15 +00:00
package ytdl
import (
"bytes"
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"io/ioutil"
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
log "github.com/sirupsen/logrus"
)
const youtubeBaseURL = "https://www.youtube.com/watch"
const youtubeEmbededBaseURL = "https://www.youtube.com/embed/"
const youtubeVideoEURL = "https://youtube.googleapis.com/v/"
const youtubeVideoInfoURL = "https://www.youtube.com/get_video_info"
const youtubeDateFormat = "2006-01-02"
// VideoInfo contains the info a youtube video
type VideoInfo struct {
// The video ID
ID string `json:"id"`
// The video title
Title string `json:"title"`
// The video description
Description string `json:"description"`
// The date the video was published
DatePublished time.Time `json:"datePublished"`
// Formats the video is available in
Formats FormatList `json:"formats"`
// List of keywords associated with the video
Keywords []string `json:"keywords"`
// Author of the video
Author string `json:"author"`
// Duration of the video
Duration time.Duration
htmlPlayerFile string
}
// GetVideoInfo fetches info from a url string, url object, or a url string
func GetVideoInfo(value interface{}) (*VideoInfo, error) {
switch t := value.(type) {
case *url.URL:
return GetVideoInfoFromURL(t)
case string:
u, err := url.ParseRequestURI(t)
if err != nil {
return GetVideoInfoFromID(t)
}
if u.Host == "youtu.be" {
return GetVideoInfoFromShortURL(u)
}
return GetVideoInfoFromURL(u)
default:
return nil, fmt.Errorf("Identifier type must be a string, *url.URL, or []byte")
}
}
// GetVideoInfoFromURL fetches video info from a youtube url
func GetVideoInfoFromURL(u *url.URL) (*VideoInfo, error) {
videoID := u.Query().Get("v")
if len(videoID) == 0 {
return nil, fmt.Errorf("Invalid youtube url, no video id")
}
return GetVideoInfoFromID(videoID)
}
// GetVideoInfoFromShortURL fetches video info from a short youtube url
func GetVideoInfoFromShortURL(u *url.URL) (*VideoInfo, error) {
if len(u.Path) >= 1 {
if path := u.Path[1:]; path != "" {
return GetVideoInfoFromID(path)
}
}
return nil, errors.New("Could not parse short URL")
}
// GetVideoInfoFromID fetches video info from a youtube video id
func GetVideoInfoFromID(id string) (*VideoInfo, error) {
u, _ := url.ParseRequestURI(youtubeBaseURL)
values := u.Query()
values.Set("v", id)
u.RawQuery = values.Encode()
resp, err := http.Get(u.String())
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("Invalid status code: %d", resp.StatusCode)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
return getVideoInfoFromHTML(id, body)
}
// GetDownloadURL gets the download url for a format
func (info *VideoInfo) GetDownloadURL(format Format) (*url.URL, error) {
return getDownloadURL(format, info.htmlPlayerFile)
}
// GetThumbnailURL returns a url for the thumbnail image
// with the given quality
func (info *VideoInfo) GetThumbnailURL(quality ThumbnailQuality) *url.URL {
u, _ := url.Parse(fmt.Sprintf("http://img.youtube.com/vi/%s/%s.jpg",
info.ID, quality))
return u
}
// Download is a convenience method to download a format to an io.Writer
func (info *VideoInfo) Download(format Format, dest io.Writer) error {
u, err := info.GetDownloadURL(format)
if err != nil {
return err
}
resp, err := http.Get(u.String())
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode > 299 {
return fmt.Errorf("Invalid status code: %d", resp.StatusCode)
}
_, err = io.Copy(dest, resp.Body)
return err
}
func getVideoInfoFromHTML(id string, html []byte) (*VideoInfo, error) {
doc, err := goquery.NewDocumentFromReader(bytes.NewReader(html))
if err != nil {
return nil, err
}
info := &VideoInfo{}
// extract description and title
info.Description = strings.TrimSpace(doc.Find("#eow-description").Text())
info.Title = strings.TrimSpace(doc.Find("#eow-title").Text())
info.ID = id
dateStr, ok := doc.Find("meta[itemprop=\"datePublished\"]").Attr("content")
if !ok {
log.Debug("Unable to extract date published")
} else {
date, err := time.Parse(youtubeDateFormat, dateStr)
if err == nil {
info.DatePublished = date
} else {
log.Debug("Unable to parse date published", err.Error())
}
}
// match json in javascript
re := regexp.MustCompile("ytplayer.config = (.*?);ytplayer.load")
matches := re.FindSubmatch(html)
var jsonConfig map[string]interface{}
if len(matches) > 1 {
err = json.Unmarshal(matches[1], &jsonConfig)
if err != nil {
return nil, err
}
} else {
log.Debug("Unable to extract json from default url, trying embedded url")
var resp *http.Response
resp, err = http.Get(youtubeEmbededBaseURL + id)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("Embeded url request returned status code %d ", resp.StatusCode)
}
html, err = ioutil.ReadAll(resp.Body)
if err != nil {
return nil, err
}
// re = regexp.MustCompile("\"sts\"\\s*:\\s*(\\d+)")
re = regexp.MustCompile("yt.setConfig\\('PLAYER_CONFIG', (.*?)\\);</script>")
matches := re.FindSubmatch(html)
if len(matches) < 2 {
return nil, fmt.Errorf("Error extracting sts from embedded url response")
}
dec := json.NewDecoder(bytes.NewBuffer(matches[1]))
err = dec.Decode(&jsonConfig)
if err != nil {
return nil, fmt.Errorf("Unable to extract json from embedded url: %s", err.Error())
}
query := url.Values{
"sts": []string{strconv.Itoa(int(jsonConfig["sts"].(float64)))},
"video_id": []string{id},
"eurl": []string{youtubeVideoEURL + id},
}
resp, err = http.Get(youtubeVideoInfoURL + "?" + query.Encode())
if err != nil {
return nil, fmt.Errorf("Error fetching video info: %s", err.Error())
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("Video info response invalid status code")
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("Unable to read video info response body: %s", err.Error())
}
query, err = url.ParseQuery(string(body))
if err != nil {
return nil, fmt.Errorf("Unable to parse video info data: %s", err.Error())
}
args := make(map[string]interface{})
for k, v := range query {
if len(v) > 0 {
args[k] = v[0]
}
}
jsonConfig["args"] = args
}
inf := jsonConfig["args"].(map[string]interface{})
if status, ok := inf["status"].(string); ok && status == "fail" {
return nil, fmt.Errorf("Error %d:%s", inf["errorcode"], inf["reason"])
}
if a, ok := inf["author"].(string); ok {
info.Author = a
} else {
log.Debug("Unable to extract author")
}
if length, ok := inf["length_seconds"].(string); ok {
if duration, err := strconv.ParseInt(length, 10, 64); err == nil {
info.Duration = time.Second * time.Duration(duration)
} else {
log.Debug("Unable to parse duration string: ", length)
}
} else {
log.Debug("Unable to extract duration")
}
// For the future maybe
parseKey := func(key string) []string {
val, ok := inf[key].(string)
if !ok {
return nil
}
vals := []string{}
split := strings.Split(val, ",")
for _, v := range split {
if v != "" {
vals = append(vals, v)
}
}
return vals
}
info.Keywords = parseKey("keywords")
info.htmlPlayerFile = jsonConfig["assets"].(map[string]interface{})["js"].(string)
/*
fmtList := parseKey("fmt_list")
fexp := parseKey("fexp")
watermark := parseKey("watermark")
if len(fmtList) != 0 {
vals := []string{}
for _, v := range fmtList {
vals = append(vals, strings.Split(v, "/")...)
} else {
info["fmt_list"] = []string{}
}
videoVerticals := []string{}
if videoVertsStr, ok := inf["video_verticals"].(string); ok {
videoVertsStr = string([]byte(videoVertsStr)[1 : len(videoVertsStr)-2])
videoVertsSplit := strings.Split(videoVertsStr, ", ")
for _, v := range videoVertsSplit {
if v != "" {
videoVerticals = append(videoVerticals, v)
}
}
}
*/
var formatStrings []string
if fmtStreamMap, ok := inf["url_encoded_fmt_stream_map"].(string); ok {
formatStrings = append(formatStrings, strings.Split(fmtStreamMap, ",")...)
}
if adaptiveFormats, ok := inf["adaptive_fmts"].(string); ok {
formatStrings = append(formatStrings, strings.Split(adaptiveFormats, ",")...)
}
var formats FormatList
for _, v := range formatStrings {
query, err := url.ParseQuery(v)
if err == nil {
itag, _ := strconv.Atoi(query.Get("itag"))
if format, ok := newFormat(itag); ok {
if strings.HasPrefix(query.Get("conn"), "rtmp") {
format.meta["rtmp"] = true
}
for k, v := range query {
if len(v) == 1 {
format.meta[k] = v[0]
} else {
format.meta[k] = v
}
}
formats = append(formats, format)
} else {
log.Debug("No metadata found for itag: ", itag, ", skipping...")
}
} else {
log.Debug("Unable to format string", err.Error())
}
}
if dashManifestURL, ok := inf["dashmpd"].(string); ok {
tokens, err := getSigTokens(info.htmlPlayerFile)
if err != nil {
return nil, fmt.Errorf("Unable to extract signature tokens: %s", err.Error())
}
regex := regexp.MustCompile("\\/s\\/([a-fA-F0-9\\.]+)")
regexSub := regexp.MustCompile("([a-fA-F0-9\\.]+)")
dashManifestURL = regex.ReplaceAllStringFunc(dashManifestURL, func(str string) string {
return "/signature/" + decipherTokens(tokens, regexSub.FindString(str))
})
dashFormats, err := getDashManifest(dashManifestURL)
if err != nil {
return nil, fmt.Errorf("Unable to extract dash manifest: %s", err.Error())
}
for _, dashFormat := range dashFormats {
added := false
for j, format := range formats {
if dashFormat.Itag == format.Itag {
formats[j] = dashFormat
added = true
break
}
}
if !added {
formats = append(formats, dashFormat)
}
}
}
info.Formats = formats
return info, nil
}
type representation struct {
Itag int `xml:"id,attr"`
Height int `xml:"height,attr"`
URL string `xml:"BaseURL"`
}
func getDashManifest(urlString string) (formats []Format, err error) {
resp, err := http.Get(urlString)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != 200 {
return nil, fmt.Errorf("Invalid status code %d", resp.StatusCode)
}
dec := xml.NewDecoder(resp.Body)
var token xml.Token
for ; err == nil; token, err = dec.Token() {
if el, ok := token.(xml.StartElement); ok && el.Name.Local == "Representation" {
var rep representation
err = dec.DecodeElement(&rep, &el)
if err != nil {
break
}
if format, ok := newFormat(rep.Itag); ok {
format.meta["url"] = rep.URL
if rep.Height != 0 {
format.Resolution = strconv.Itoa(rep.Height) + "p"
} else {
format.Resolution = ""
}
formats = append(formats, format)
} else {
log.Debug("No metadata found for itag: ", rep.Itag, ", skipping...")
}
}
}
if err != io.EOF {
return nil, err
}
return formats, nil
}