name: GoddessSnow.com sceneByName: action: scrapeXPath scraper: sceneSearch queryURL: "https://www.goddesssnow.com/vod/search.php?query={}" # We don't want the /updates URL here because it has the wrong release date by a year (scenes get released a year early on /scenes) # And also the descriotion is truncated often on /updates # /scenes also has two versions, one that ends in "_vids.html" and one that ends in ".html" # We want to make sure we get the _vids.html version as that is the one with the images # We take care of both issues above in the queryURLReplace section sceneByURL: - action: scrapeXPath url: - goddesssnow.com/updates/ - goddesssnow.com/vod/scenes queryURL: "{url}" queryURLReplace: url: # convert /updates URLs to /vod/scenes - regex: (.+)(\/updates\/)(.+)(\.html) with: $1/vod/scenes/$3.html # fix up the /vod/scenes urls that people may have that do not end in _vids.html. First get rid of it for everyone, then add it back in. # This both adds it to the /updates urls from above, leaves the urls that have the correct form alone, and fix /vod/scenes urls that are the "bad" ones # We are doing this two step process because Go regex does not support backreferences which would have let us cleanly do this in one regex - regex: _vids\.html with: ".html" - regex: \.html with: "_vids.html" scraper: sceneScraper sceneByQueryFragment: action: scrapeXPath queryURL: "{url}" scraper: sceneScraper xPathScrapers: sceneScraper: scene: Title: //div[@class="title_bar"]/span/text() URL: selector: //div/@data-redirect postProcess: - replace: - regex: \.html with: _vids.html Date: selector: //span[@class="release-date"]/text()|//div[@class="cell update_date"]/text() postProcess: - replace: - regex: ^Release Date:\s with: - parseDate: 01/02/2006 Details: //span[@class="update_description"] Tags: Name: selector: //span[@class="update_tags"]/a/text() Image: selector: //div[@class="VOD_update"]/img/@src0_4x postProcess: - replace: - regex: ^ with: https://www.goddesssnow.com Studio: Name: fixed: Alexandra Snow Performers: Name: //span[@class="update_models"]/a sceneSearch: scene: Title: //div[@class="update_details"]/div/@data-title URL: //a[@class="update-details-image"]/@href Image: selector: //a[@class="update-details-image"]/img/@src0_1x postProcess: - replace: - regex: ^ with: https://www.goddesssnow.com # Last Updated January 31, 2023