83 lines
3.0 KiB
YAML
83 lines
3.0 KiB
YAML
name: Kpopping
|
|
performerByName:
|
|
action: scrapeXPath
|
|
queryURL: https://kpopping.com/profiles/idol/{}
|
|
scraper: performerScraper
|
|
performerByURL:
|
|
- action: scrapeXPath
|
|
url:
|
|
- https://kpopping.com/profiles/idol/
|
|
scraper: performerScraper
|
|
xPathScrapers:
|
|
performerScraper:
|
|
common:
|
|
$summary: //div[contains(@class, "summary-only-one")]
|
|
performer:
|
|
Name:
|
|
selector: $summary/section/h1/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: " †"
|
|
with: ""
|
|
Gender:
|
|
selector: //ol[@class="breadcrumb"]/li[2]/a/text()
|
|
postProcess:
|
|
- map:
|
|
Girl groups: female
|
|
Boy groups: male
|
|
Female idols: female
|
|
Male idols: male
|
|
URL: //link[@rel="canonical"]/@href
|
|
Twitter:
|
|
selector: //a[contains(@class, "fa-twitter")]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: https://twitter.com/(\w+)\?lang=en
|
|
with: $1
|
|
Instagram:
|
|
selector: //a[contains(@class, "fa-instagram")]/@href
|
|
postProcess:
|
|
- replace:
|
|
- regex: https://www.instagram.com/(\w+)/\?hl=en
|
|
with: $1
|
|
Birthdate:
|
|
selector: $summary//div[@class="equal"][contains(., "Birthday:")]/following-sibling::div[1]/a/text()
|
|
postProcess:
|
|
- parseDate: Jan 2, 2006
|
|
DeathDate:
|
|
selector: $summary//div[@class="equal"][contains(., "Passed away:")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- parseDate: Jan 2, 2006
|
|
Country: $summary//strong[contains(., "Country:")]/following-sibling::p/text()
|
|
Height:
|
|
selector: $summary//div[@class="equal"][contains(., "Height:")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*?(\d+)[\s\xA0]*cm.*'
|
|
with: $1
|
|
Weight:
|
|
selector: $summary//div[@class="equal"][contains(., "Weight:")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: '.*?(\d+)[\s\xA0]*kg.*'
|
|
with: $1
|
|
CareerLength:
|
|
selector: //div[contains(@class, "data-grid")]/section[@class="cell"]/div[@class="name"][contains(text(), "Active years:")]/following-sibling::div[1]/text()
|
|
postProcess:
|
|
- replace:
|
|
- regex: -present
|
|
with: " -"
|
|
Aliases:
|
|
selector: $summary//dfn[contains(text(), "Korean")]/following-sibling::text()|$summary//div[@class="equal"][contains(., "Full name:")]/following-sibling::div[1]/text()|$summary//div[@class="equal"][contains(., "Native name:")]/following-sibling::div[1]/text()
|
|
concat: ", "
|
|
Image:
|
|
selector: $summary//div[contains(@class, "whole-body")]/@data-bg
|
|
postProcess:
|
|
- replace:
|
|
- regex: ^
|
|
with: https://kpopping.com/
|
|
Details:
|
|
selector: //div[@class="box encyclopedia"]/h2[contains(., "Introduction to ")]/following-sibling::p/text()
|
|
concat: "\r\n\r\n"
|
|
# Last Updated April 29, 2022
|