name: Babepedia
performerByName:
  action: scrapeXPath
  queryURL: https://www.babepedia.com/search/{}
  scraper: performerSearch
performerByURL:
  - action: scrapeXPath
    url:
      - https://www.babepedia.com
    scraper: performerScraper

xPathScrapers:
  performerSearch:
    performer:
      Name:
        selector: //span[@class="results"]//a[contains(@href, '/babe/')]|//div[@id="bioarea"]/h1
        postProcess:
          - replace:
              - regex: "aka(\\s+[A-Z]+)"
                with: " aka $1"

      URL:
        selector: //span[@class="results"]//a[contains(@href, '/babe/')]/@href|//meta[@property='og:url']/@content
        postProcess:
          - replace:
              - regex: ^
                with: https://www.babepedia.com
              - regex: https:\/\/www\.babepedia\.comhttps:\/\/www\.babepedia\.com
                with: https://www.babepedia.com

  performerScraper:
    common:
      $label: //div[@id='bioarea']//ul/li[span[@class='label']
    performer:
      Name: //div[@id="bioarea"]/h1
      Gender:
        fixed: "Female"
      URL: //head/meta[@property="og:url"]/@content
      Twitter:
        selector: //div[@id='socialicons']//a[img[@alt='Twitter account']]/@href
        postProcess:
          - replace:
              - regex: https://twitter.com/
                with: ""
      Instagram:
        selector: //div[@id='socialicons']//a[img[@alt='Instagram account']]/@href
        postProcess:
          - replace:
              - regex: https://instagram.com/
                with: ""
      Birthdate:
        selector: $label[text()='Born:']]//text()
        concat: " "
        postProcess:
          - replace:
              - regex: (\w+:)(\s)(\w+)(\s)(\d+)(\w+)(\s)(\w+)(\s)(\w+)(\s)(\d+)
                with: $5 $10 $12
          - parseDate: 2 January 2006
      DeathDate:
        selector: $label[text()='Died:']]/text()
        postProcess:
          - replace:
              - regex: (\d+)(st|nd|rd|th)
                with: "$1"
              - regex: (\sof\s)|(\(age.+)
                with: " "
          - parseDate: Monday 2 January 2006
      Ethnicity:
        selector: $label[text()='Ethnicity:']]/a/text()|$label[text()='Ethnicity:']]/text()
        postProcess:
          - map:
              Asian: asian
              Caucasian: white
              Black: black
              Latin: hispanic
      Country: $label[text()='Birthplace']]/a[contains(@href,'topbabespercountry')]/text()
      EyeColor:
        selector: $label[text()='Eye color:']]
        postProcess:
          - replace:
              - regex: "Eye color: "
                with:
      HairColor:
        selector: $label[text()='Hair color:']]
        postProcess:
          - replace:
              - regex: "Hair color: "
                with:
      Height:
        selector: $label[text()='Height:']]/text()
        postProcess:
          - replace:
              - regex: ^.*\s(\d+)\scm.*$
                with: $1
      Weight:
        selector: $label[text()='Weight:']]/text()
        postProcess:
          - replace:
              - regex: ^.*\s(\d+)\skg.*$
                with: $1
      Measurements:
        selector: $label[text()='Measurements:']]/text()|$label[text()='Bra/cup size:']]/text()
        concat: "|"
        postProcess:
          - replace:
              - regex: (\d*)([a-zA-Z-]*)(\d*-\d*)(.+?)([a-zA-Z]+)(.*)
                with: $1$5-$3
      FakeTits:
        selector: $label[text()='Boobs:']]
        postProcess:
          - replace:
              - regex: "Boobs: "
                with: ""
          - map:
              Real/Natural: "No"
              Fake/Enhanced: "Yes"
      CareerLength: $label[text()='Years active:']]/text()
      Aliases:
        selector: //div[@id='bioarea']//h2/text()
        postProcess:
          - replace:
              - regex: aka
                with: ""
              - regex: '\/'
                with: ","
      Tattoos: $label[text()='Tattoos:']]/text()
      Piercings: $label[text()='Piercings:']]/text()
      Image:
        selector: //div[@id='profimg']//a[@class='img']/@href
        # Alternitive selector will use site logo as image if none is found
        # selector: //div[@id='profimg']//a/@href
        postProcess:
          - replace:
              - regex: javascript\:alert.+;
                with: /images/logo.png
              - regex: ^
                with: https://www.babepedia.com
      Details:
        selector: //div[@class="babebanner separate"]/p//text()
        concat: "\n"
        postProcess:
          - replace:
              - regex: '\nPerformances'
                with: "\n\nPerformances"
              - regex: '(and|with|of|,)\n'
                with: "$1 "
              - regex: '\n(,|\.|\))'
                with: "$1"
# Last Updated January 05, 2022