stash
This commit is contained in:
447
stash/config/scrapers/community/Boobpedia/Boobpedia.yml
Normal file
447
stash/config/scrapers/community/Boobpedia/Boobpedia.yml
Normal file
@@ -0,0 +1,447 @@
|
||||
name: Boobpedia
|
||||
performerByURL:
|
||||
- action: scrapeXPath
|
||||
url:
|
||||
- boobpedia.com/boobs/
|
||||
scraper: performerScraper
|
||||
performerByName:
|
||||
action: scrapeJson
|
||||
queryURL: "https://www.boobpedia.com/wiki/api.php?action=query&format=json&list=search&srwhat=title&srsearch={}"
|
||||
scraper: performerSearch
|
||||
jsonScrapers:
|
||||
performerSearch:
|
||||
performer:
|
||||
Name: query.search.#.title
|
||||
URL:
|
||||
selector: query.search.#.title
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \s
|
||||
with: "_"
|
||||
- regex: ^
|
||||
with: "https://www.boobpedia.com/boobs/"
|
||||
xPathScrapers:
|
||||
performerScraper:
|
||||
performer:
|
||||
Name: //h1
|
||||
Twitter: //table//tr/td/a[b[text()='Twitter']]/@href
|
||||
Instagram: //table//tr/td/a[b[text()='Instagram']]/@href
|
||||
Birthdate:
|
||||
selector: //table//tr/td//b[text()='Born']/../following-sibling::td/a
|
||||
concat: " "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (.*\d\d\d\d).*
|
||||
with: $1
|
||||
- parseDate: January 2 2006
|
||||
Ethnicity:
|
||||
selector: //table//tr/td/b[text()='Ethnicity']/../following-sibling::td/a
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \[\d*\]
|
||||
with: ""
|
||||
EyeColor:
|
||||
selector: //table//tr/td/b[text()='Eye color']/../following-sibling::td
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \[\d*\]
|
||||
with: ""
|
||||
Height:
|
||||
selector: //table//tr/td/b[text()='Height']/../following-sibling::td
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (?:.+\D)?(\d+\.\d+)\Dm.+
|
||||
with: $1
|
||||
- regex: \.
|
||||
with: ""
|
||||
Weight:
|
||||
selector: //table//tr/td/b[text()='Weight']/../following-sibling::td
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (?:.+\D)?(\d+)\Dkg.+
|
||||
with: $1
|
||||
Measurements:
|
||||
selector: //table//tr/td/b[text()='Measurements']/../following-sibling::td|//table//tr/td[contains(b,'cup')]/following-sibling::td
|
||||
concat: "|"
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (\d+)-(\d+)-(\d+)[^|]*\|(\d+\S+).+ # get measurements + cup
|
||||
with: $4-$2-$3
|
||||
- regex: \|.+$ # fallback to clear non matching regexes
|
||||
with: ""
|
||||
- regex: \[\d*\] # Remove References
|
||||
with: ""
|
||||
- regex: ( in) # Remove Unit Inches
|
||||
with: ""
|
||||
FakeTits:
|
||||
selector: //table//tr/td/b[text()='Boobs']/../following-sibling::td/a
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \[\d*\] # Remove References
|
||||
with: ""
|
||||
- map:
|
||||
"Enhanced": "Fake"
|
||||
"Natural": "Natural"
|
||||
HairColor:
|
||||
selector: //table//tr/td[contains(b,'Hair')]/following-sibling::td//text()
|
||||
concat: ", "
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: (,,)
|
||||
with: ","
|
||||
- regex: ( , )
|
||||
with: " "
|
||||
- regex: \[\d*\]
|
||||
with: ""
|
||||
# nbsp; screws up the parsing, so use contains instead
|
||||
CareerLength:
|
||||
selector: //table//tr/td/b[text()[contains(.,'active')]]/../following-sibling::td
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \[\d*\] # Remove References
|
||||
with: ""
|
||||
- regex: (—|–)
|
||||
with: "-"
|
||||
- regex: (\S)-(\S)
|
||||
with: $1 - $2
|
||||
- regex: (?i)(present|current)
|
||||
with: ""
|
||||
Aliases:
|
||||
selector: //table//tr/td/b[text()[contains(.,'known')]]/../following-sibling::td
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: \[\d*\]
|
||||
with: ""
|
||||
Image:
|
||||
#selector: //table[@class="infobox"]//img/@src #alterntive image, no need for subScraper but gets lq image
|
||||
selector: //table[@class="infobox plainlinks"]//a[img[@src]]/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: https://www.boobpedia.com
|
||||
- subScraper:
|
||||
selector: //div[@id="file"]/a/@href
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: https://www.boobpedia.com
|
||||
URL:
|
||||
selector: //script[contains(.,"wgPageName")]
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: '.+wgPageName":"([^"]+)".+'
|
||||
with: "https://www.boobpedia.com/boobs/$1"
|
||||
Details:
|
||||
selector: //div[@class="mw-parser-output"]/p
|
||||
concat: "\n\n"
|
||||
postProcess:
|
||||
- replace:
|
||||
# Remove References
|
||||
- regex: \[\d*\]
|
||||
with: ""
|
||||
# Remove <protect> and </protect>, which appears in the details of some performers (e.g. Jenna Jameson)
|
||||
- regex: </?protect>
|
||||
with: ""
|
||||
# Remove triple line breaks
|
||||
- regex: \n\n\n
|
||||
with: "\n"
|
||||
Country:
|
||||
selector: //table//tr/td/b[text()='Nationality']/../following-sibling::td/a
|
||||
postProcess:
|
||||
- map:
|
||||
"Abkhaz": "Abkhazia"
|
||||
"Abkhazian": "Abkhazia"
|
||||
"Afghan": "Afghanistan"
|
||||
"Albanian": "Albania"
|
||||
"Algerian": "Algeria"
|
||||
"American Samoan": "American Samoa"
|
||||
"American": "United States of America"
|
||||
"Andorran": "Andorra"
|
||||
"Angolan": "Angola"
|
||||
"Anguillan": "Anguilla"
|
||||
"Antarctic": "Antarctica"
|
||||
"Antiguan": "Antigua and Barbuda"
|
||||
"Argentine": "Argentina"
|
||||
"Argentinian": "Argentina"
|
||||
"Armenian": "Armenia"
|
||||
"Aruban": "Aruba"
|
||||
"Australian": "Australia"
|
||||
"Austrian": "Austria"
|
||||
"Azerbaijani": "Azerbaijan"
|
||||
"Azeri": "Azerbaijan"
|
||||
"Bahamian": "Bahamas"
|
||||
"Bahraini": "Bahrain"
|
||||
"Bangladeshi": "Bangladesh"
|
||||
"Barbadian": "Barbados"
|
||||
"Barbudan": "Antigua and Barbuda"
|
||||
"Basotho": "Lesotho"
|
||||
"Belarusian": "Belarus"
|
||||
"Belgian": "Belgium"
|
||||
"Belizean": "Belize"
|
||||
"Beninese": "Benin"
|
||||
"Beninois": "Benin"
|
||||
"Bermudan": "Bermuda"
|
||||
"Bermudian": "Bermuda"
|
||||
"Bhutanese": "Bhutan"
|
||||
"BIOT": "British Indian Ocean Territory"
|
||||
"Bissau-Guinean": "Guinea-Bissau"
|
||||
"Bolivian": "Bolivia"
|
||||
"Bonaire": "Bonaire"
|
||||
"Bonairean": "Bonaire"
|
||||
"Bosnian": "Bosnia and Herzegovina"
|
||||
"Botswanan": "Botswana"
|
||||
"Bouvet Island": "Bouvet Island"
|
||||
"Brazilian": "Brazil"
|
||||
"British Virgin Island": "Virgin Islands British"
|
||||
"British": "United Kingdom"
|
||||
"Bruneian": "Brunei"
|
||||
"Bulgarian": "Bulgaria"
|
||||
"Burkinabé": "Burkina Faso"
|
||||
"Burmese": "Burma"
|
||||
"Burundian": "Burundi"
|
||||
"Cabo Verdean": "Cabo Verde"
|
||||
"Cambodian": "Cambodia"
|
||||
"Cameroonian": "Cameroon"
|
||||
"Canadian": "Canada"
|
||||
"Cantonese": "Hong Kong"
|
||||
"Caymanian": "Cayman Islands"
|
||||
"Central African": "Central African Republic"
|
||||
"Chadian": "Chad"
|
||||
"Channel Island": "Guernsey"
|
||||
#Channel Island: "Jersey"
|
||||
"Chilean": "Chile"
|
||||
"Chinese": "China"
|
||||
"Christmas Island": "Christmas Island"
|
||||
"Cocos Island": "Cocos (Keeling) Islands"
|
||||
"Colombian": "Colombia"
|
||||
"Comoran": "Comoros"
|
||||
"Comorian": "Comoros"
|
||||
"Congolese": "Congo"
|
||||
"Cook Island": "Cook Islands"
|
||||
"Costa Rican": "Costa Rica"
|
||||
"Croatian": "Croatia"
|
||||
"Cuban": "Cuba"
|
||||
"Curaçaoan": "Curaçao"
|
||||
"Cypriot": "Cyprus"
|
||||
"Czech": "Czech Republic"
|
||||
"Danish": "Denmark"
|
||||
"Djiboutian": "Djibouti"
|
||||
"Dominican": "Dominica"
|
||||
"Dutch": "Netherlands"
|
||||
"Ecuadorian": "Ecuador"
|
||||
"Egyptian": "Egypt"
|
||||
"Emirati": "United Arab Emirates"
|
||||
"Emiri": "United Arab Emirates"
|
||||
"Emirian": "United Arab Emirates"
|
||||
"English people": "England"
|
||||
"English": "England"
|
||||
"Equatoguinean": "Equatorial Guinea"
|
||||
"Equatorial Guinean": "Equatorial Guinea"
|
||||
"Eritrean": "Eritrea"
|
||||
"Estonian": "Estonia"
|
||||
"Ethiopian": "Ethiopia"
|
||||
"European": "European Union"
|
||||
"Falkland Island": "Falkland Islands"
|
||||
"Faroese": "Faroe Islands"
|
||||
"Fijian": "Fiji"
|
||||
"Filipino": "Philippines"
|
||||
"Finnish": "Finland"
|
||||
"Formosan": "Taiwan"
|
||||
"French Guianese": "French Guiana"
|
||||
"French Polynesian": "French Polynesia"
|
||||
"French Southern Territories": "French Southern Territories"
|
||||
"French": "France"
|
||||
"Futunan": "Wallis and Futuna"
|
||||
"Gabonese": "Gabon"
|
||||
"Gambian": "Gambia"
|
||||
"Georgian": "Georgia"
|
||||
"German": "Germany"
|
||||
"Ghanaian": "Ghana"
|
||||
"Gibraltar": "Gibraltar"
|
||||
"Greek": "Greece"
|
||||
"Greenlandic": "Greenland"
|
||||
"Grenadian": "Grenada"
|
||||
"Guadeloupe": "Guadeloupe"
|
||||
"Guamanian": "Guam"
|
||||
"Guatemalan": "Guatemala"
|
||||
"Guinean": "Guinea"
|
||||
"Guyanese": "Guyana"
|
||||
"Haitian": "Haiti"
|
||||
"Heard Island": "Heard Island and McDonald Islands"
|
||||
"Hellenic": "Greece"
|
||||
"Herzegovinian": "Bosnia and Herzegovina"
|
||||
"Honduran": "Honduras"
|
||||
"Hong Kong": "Hong Kong"
|
||||
"Hong Konger": "Hong Kong"
|
||||
"Hungarian": "Hungary"
|
||||
"Icelandic": "Iceland"
|
||||
"Indian": "India"
|
||||
"Indonesian": "Indonesia"
|
||||
"Iranian": "Iran"
|
||||
"Iraqi": "Iraq"
|
||||
"Irish": "Ireland"
|
||||
"Israeli": "Israel"
|
||||
"Israelite": "Israel"
|
||||
"Italian": "Italy"
|
||||
"Ivorian": "Ivory Coast"
|
||||
"Jamaican": "Jamaica"
|
||||
"Jan Mayen": "Jan Mayen"
|
||||
"Japanese": "Japan"
|
||||
"Jordanian": "Jordan"
|
||||
"Kazakh": "Kazakhstan"
|
||||
"Kazakhstani": "Kazakhstan"
|
||||
"Kenyan": "Kenya"
|
||||
"Kirghiz": "Kyrgyzstan"
|
||||
"Kirgiz": "Kyrgyzstan"
|
||||
"Kiribati": "Kiribati"
|
||||
"Korean": "South Korea"
|
||||
"Kosovan": "Kosovo"
|
||||
"Kosovar": "Kosovo"
|
||||
"Kuwaiti": "Kuwait"
|
||||
"Kyrgyz": "Kyrgyzstan"
|
||||
"Kyrgyzstani": "Kyrgyzstan"
|
||||
"Lao": "Lao People's Democratic Republic"
|
||||
"Laotian": "Lao People's Democratic Republic"
|
||||
"Latvian": "Latvia"
|
||||
"Lebanese": "Lebanon"
|
||||
"Lettish": "Latvia"
|
||||
"Liberian": "Liberia"
|
||||
"Libyan": "Libya"
|
||||
"Liechtensteiner": "Liechtenstein"
|
||||
"Lithuanian": "Lithuania"
|
||||
"Luxembourg": "Luxembourg"
|
||||
"Luxembourgish": "Luxembourg"
|
||||
"Macanese": "Macau"
|
||||
"Macedonian": "North Macedonia"
|
||||
"Magyar": "Hungary"
|
||||
"Mahoran": "Mayotte"
|
||||
"Malagasy": "Madagascar"
|
||||
"Malawian": "Malawi"
|
||||
"Malaysian": "Malaysia"
|
||||
"Maldivian": "Maldives"
|
||||
"Malian": "Mali"
|
||||
"Malinese": "Mali"
|
||||
"Maltese": "Malta"
|
||||
"Manx": "Isle of Man"
|
||||
"Marshallese": "Marshall Islands"
|
||||
"Martinican": "Martinique"
|
||||
"Martiniquais": "Martinique"
|
||||
"Mauritanian": "Mauritania"
|
||||
"Mauritian": "Mauritius"
|
||||
"McDonald Islands": "Heard Island and McDonald Islands"
|
||||
"Mexican": "Mexico"
|
||||
"Moldovan": "Moldova"
|
||||
"Monacan": "Monaco"
|
||||
"Mongolian": "Mongolia"
|
||||
"Montenegrin": "Montenegro"
|
||||
"Montserratian": "Montserrat"
|
||||
"Monégasque": "Monaco"
|
||||
"Moroccan": "Morocco"
|
||||
"Motswana": "Botswana"
|
||||
"Mozambican": "Mozambique"
|
||||
"Myanma": "Myanmar"
|
||||
"Namibian": "Namibia"
|
||||
"Nauruan": "Nauru"
|
||||
"Nepalese": "Nepal"
|
||||
"Nepali": "Nepal"
|
||||
"Netherlandic": "Netherlands"
|
||||
"New Caledonian": "New Caledonia"
|
||||
"New Zealand": "New Zealand"
|
||||
"Ni-Vanuatu": "Vanuatu"
|
||||
"Nicaraguan": "Nicaragua"
|
||||
"Nigerian": "Nigeria"
|
||||
"Nigerien": "Niger"
|
||||
"Niuean": "Niue"
|
||||
"Norfolk Island": "Norfolk Island"
|
||||
"Northern Irish": "Northern Ireland"
|
||||
"Northern Marianan": "Northern Mariana Islands"
|
||||
"Norwegian": "Norway"
|
||||
"Omani": "Oman"
|
||||
"Pakistani": "Pakistan"
|
||||
"Palauan": "Palau"
|
||||
"Palestinian": "Palestine"
|
||||
"Panamanian": "Panama"
|
||||
"Papua New Guinean": "Papua New Guinea"
|
||||
"Papuan": "Papua New Guinea"
|
||||
"Paraguayan": "Paraguay"
|
||||
"Persian": "Iran"
|
||||
"Peruvian": "Peru"
|
||||
"Philippine": "Philippines"
|
||||
"Pitcairn Island": "Pitcairn Islands"
|
||||
"Polish": "Poland"
|
||||
"Portuguese": "Portugal"
|
||||
"Puerto Rican": "Puerto Rico"
|
||||
"Qatari": "Qatar"
|
||||
"Romanian": "Romania"
|
||||
"Russian": "Russia"
|
||||
"Rwandan": "Rwanda"
|
||||
"Saba": "Saba"
|
||||
"Saban": "Saba"
|
||||
"Sahraouian": "Western Sahara"
|
||||
"Sahrawi": "Western Sahara"
|
||||
"Sahrawian": "Western Sahara"
|
||||
"Salvadoran": "El Salvador"
|
||||
"Sammarinese": "San Marino"
|
||||
"Samoan": "Samoa"
|
||||
"Saudi Arabian": "Saudi Arabia"
|
||||
"Saudi": "Saudi Arabia"
|
||||
"Scottish": "Scotland"
|
||||
"Senegalese": "Senegal"
|
||||
"Serbian": "Serbia"
|
||||
"Seychellois": "Seychelles"
|
||||
"Sierra Leonean": "Sierra Leone"
|
||||
"Singapore": "Singapore"
|
||||
"Singaporean": "Singapore"
|
||||
"Slovak": "Slovakia"
|
||||
"Slovene": "Slovenia"
|
||||
"Slovenian": "Slovenia"
|
||||
"Solomon Island": "Solomon Islands"
|
||||
"Somali": "Somalia"
|
||||
"Somalilander": "Somaliland"
|
||||
"South African": "South Africa"
|
||||
"South Georgia Island": "South Georgia and the South Sandwich Islands"
|
||||
"South Ossetian": "South Ossetia"
|
||||
"South Sandwich Island": "South Georgia and the South Sandwich Islands"
|
||||
"South Sudanese": "South Sudan"
|
||||
"Spanish": "Spain"
|
||||
"Sri Lankan": "Sri Lanka"
|
||||
"Sudanese": "Sudan"
|
||||
"Surinamese": "Suriname"
|
||||
"Svalbard resident": "Svalbard"
|
||||
"Swati": "Eswatini"
|
||||
"Swazi": "Eswatini"
|
||||
"Swedish": "Sweden"
|
||||
"Swiss": "Switzerland"
|
||||
"Syrian": "Syrian Arab Republic"
|
||||
"Taiwanese": "Taiwan"
|
||||
"Tajikistani": "Tajikistan"
|
||||
"Tanzanian": "Tanzania"
|
||||
"Thai": "Thailand"
|
||||
"Timorese": "Timor-Leste"
|
||||
"Tobagonian": "Trinidad and Tobago"
|
||||
"Togolese": "Togo"
|
||||
"Tokelauan": "Tokelau"
|
||||
"Tongan": "Tonga"
|
||||
"Trinidadian": "Trinidad and Tobago"
|
||||
"Tunisian": "Tunisia"
|
||||
"Turkish": "Turkey"
|
||||
"Turkmen": "Turkmenistan"
|
||||
"Turks and Caicos Island": "Turks and Caicos Islands"
|
||||
"Tuvaluan": "Tuvalu"
|
||||
"Ugandan": "Uganda"
|
||||
"Ukrainian": "Ukraine"
|
||||
"Uruguayan": "Uruguay"
|
||||
"Uzbek": "Uzbekistan"
|
||||
"Uzbekistani": "Uzbekistan"
|
||||
"Vanuatuan": "Vanuatu"
|
||||
"Vatican": "Vatican City State"
|
||||
"Venezuelan": "Venezuela"
|
||||
"Vietnamese": "Vietnam"
|
||||
"Wallis and Futuna": "Wallis and Futuna"
|
||||
"Wallisian": "Wallis and Futuna"
|
||||
"Welsh": "Wales"
|
||||
"Yemeni": "Yemen"
|
||||
"Zambian": "Zambia"
|
||||
"Zimbabwean": "Zimbabwe"
|
||||
"Åland Island": "Åland Islands"
|
||||
# Last Updated December 30, 2022
|
||||
Reference in New Issue
Block a user