This commit is contained in:
Christoph Califice
2025-10-09 20:05:31 -03:00
parent ed22ef22bc
commit 0a5f88d75a
1442 changed files with 101562 additions and 0 deletions

View File

@@ -0,0 +1,153 @@
from inspect import stack
from pathlib import Path
import py_common.log as log
def get_config(default: str | None = None) -> "CustomConfig":
"""
Gets the config for the currently executing script, taking a default config as a fallback:
This allows scrapers to define their own configuration options in a way that lets them
persist across reinstalls
The default config must have the same format as a simple .ini config file consisting of
key-value pairs separated by an equals sign, and can optionally contain comments and blank lines
for readability
"""
config = CustomConfig(default)
if not default:
log.warning("No config specified")
return config
# Note: chained configs were removed until we find a use case for them
# The paths of every script in the callstack: in the above example this would be:
# this script the api script the site script
# "/scrapers/py_common/util.py", "/scrapers/api/scraper.py", "/scrapers/site/site.py"
# In a single script scraper this would just be:
# this script the site script
# "/scrapers/py_common/util.py", "/scrapers/site/site.py"
paths = [frame.filename for frame in stack() if not frame.filename.startswith("<")]
if len(paths) < 2:
log.warning(
"Expected at least 2 paths in the stack: "
"the current file and the script that called it"
)
log.warning("Not persisting config")
return config
# We can output the path of the script that called this function
# to help with debugging config issues
current_path = Path(paths[1]).absolute()
prefix = str(Path(current_path.parent.name, current_path.name))
configs = [Path(p).parent / ("config.ini") for p in paths][1:]
# See git history if you want the chained configs version
config_path = configs[0]
if not config_path.exists():
log.debug(f"[{prefix}] First run, creating default config at {config_path}")
config_path.write_text(str(config))
else:
log.debug(f"[{prefix}] Reading config from {config_path}")
config.update(config_path.read_text())
return config
class Chunk:
def __init__(self, raw: list[str]):
self.comments = []
self.key = self.value = None
for line in raw:
if not line or line.startswith("#"):
self.comments.append(line)
elif "=" in line:
key, value = [x.strip() for x in line.split("=", 1)]
if not key.isidentifier():
log.warning(f"Config key '{key}' is not a valid identifier")
self.key = key
self.value = self.__parse_value(value)
else:
log.warning(f"Ignoring invalid config line: {line}")
def __parse_value(self, value):
if value.lower() == "true":
return True
elif value.lower() == "false":
return False
elif "." in value:
try:
return float(value)
except ValueError:
return value
elif value.isdigit():
return int(value)
else:
return value
def chunkify(config_string):
chunks = []
current_chunk = []
if not config_string:
return chunks, current_chunk
for lineno, line in enumerate(config_string.strip().splitlines()):
line = line.strip()
current_chunk.append(line)
if "=" in line:
chunks.append(Chunk(current_chunk))
current_chunk = []
elif not line.startswith("#") and line:
log.warning(f"Ignoring invalid config line {lineno}: {line}")
return chunks, current_chunk
class CustomConfig:
"""
Custom config parser that stores comments associated with each key
Settings must be in the format:
```ini
# optional comment
key = value
```
"""
def __init__(self, config_string: str | None = None):
chunks, trailing_comments = chunkify(config_string)
self.config_dict = {chunk.key: chunk.value for chunk in reversed(chunks)}
self.comments = {chunk.key: chunk.comments for chunk in chunks}
self.trailing_comments = trailing_comments
def update(self, config_string: str):
new_chunks, new_trailing_comments = chunkify(config_string)
for chunk in new_chunks:
if chunk.key not in self.config_dict:
self.comments[chunk.key] = chunk.comments
self.config_dict[chunk.key] = chunk.value
for line in new_trailing_comments:
if line not in self.trailing_comments:
self.trailing_comments.append(line)
def __getattr__(self, name):
if name in self.config_dict:
return self.config_dict[name]
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{name}'"
)
def __getitem__(self, name):
return self.config_dict[name]
def __str__(self):
"Generate a string representation of the configuration"
lines = []
for key, value in reversed(self.config_dict.items()):
# Add comments associated with the key
lines.extend(self.comments[key])
lines.append(f"{key} = {value}")
lines.extend(reversed(self.trailing_comments))
return "\n".join(lines)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,39 @@
import sys
import re
# Log messages sent from a script scraper instance are transmitted via stderr and are
# encoded with a prefix consisting of special character SOH, then the log
# level (one of t, d, i, w or e - corresponding to trace, debug, info,
# warning and error levels respectively), then special character
# STX.
#
# The log.trace, log.debug, log.info, log.warning, and log.error methods, and their equivalent
# formatted methods are intended for use by script scraper instances to transmit log
# messages.
#
def __log(level_char: bytes, s):
if level_char:
lvl_char = "\x01{}\x02".format(level_char.decode())
s = re.sub(r"data:image.+?;base64(.+?')","[...]",str(s))
for x in s.split("\n"):
print(lvl_char, x, file=sys.stderr, flush=True)
def trace(s):
__log(b't', s)
def debug(s):
__log(b'd', s)
def info(s):
__log(b'i', s)
def warning(s):
__log(b'w', s)
def error(s):
__log(b'e', s)

View File

@@ -0,0 +1,13 @@
id: py_common
name: py_common
metadata: {}
version: 1bd9130
date: "2024-02-27 15:12:39"
requires: []
source_repository: https://stashapp.github.io/CommunityScrapers/stable/index.yml
files:
- util.py
- graphql.py
- types.py
- log.py
- config.py

View File

@@ -0,0 +1,118 @@
from typing import Literal, Required, TypedDict
"""
Types for outputs that scrapers can produce and that Stash will accept
"""
class ScrapedTag(TypedDict):
name: str
"Name is the only required field"
class ScrapedPerformer(TypedDict, total=False):
name: Required[str]
"Name is the only required field"
disambiguation: str
"This is only added through Tagger view"
gender: Literal["MALE", "FEMALE", "TRANSGENDER_MALE", "TRANSGENDER_FEMALE", "INTERSEX", "NON_BINARY"]
url: str
twitter: str
instagram: str
birthdate: str
"Must be in the format YYYY-MM-DD"
death_date: str
"Must be in the format YYYY-MM-DD"
ethnicity: Literal["CAUCASIAN", "BLACK", "ASIAN", "INDIAN", "LATIN", "MIDDLE_EASTERN", "MIXED", "OTHER"]
country: str
"Not validated"
eye_color: Literal["BLUE", "BROWN", "GREEN", "GREY", "HAZEL", "RED"]
hair_color: Literal["BLONDE", "BRUNETTE", "BLACK", "RED", "AUBURN", "GREY", "BALD", "VARIOUS", "OTHER"]
"Hair color, can be 'VARIOUS' or 'OTHER' if the performer has multiple hair colors"
height: str
"Height in centimeters"
weight: str
"Weight in kilograms"
measurements: str
"bust-waist-hip measurements in centimeters, with optional cupsize for bust (e.g. 90-60-90, 90C-60-90)"
fake_tits: str
penis_length: str
circumcised: str
career_length: str
tattoos: str
piercings: str
aliases: str
"Must be comma-delimited in order to be parsed correctly"
tags: list[ScrapedTag]
image: str
images: list[str]
"Images can be URLs or base64-encoded images"
details: str
class ScrapedStudio(TypedDict, total=False):
name: Required[str]
"Name is the only required field"
url: str
parent: 'ScrapedStudio'
image: str
class ScrapedMovie(TypedDict, total=False):
name: str
date: str
"Must be in the format YYYY-MM-DD"
duration: str
"Duration in seconds"
director: str
synopsis: str
studio: ScrapedStudio
rating: str
front_image: str
back_image: str
url: str
aliases: str
class ScrapedGallery(TypedDict, total=False):
title: str
details: str
url: str
urls: list[str]
date: str
"Must be in the format YYYY-MM-DD"
studio: ScrapedStudio
tags: list[ScrapedTag]
performers: list[ScrapedPerformer]
code: str
photographer: str
class ScrapedScene(TypedDict, total=False):
title: str
details: str
url: str
urls: list[str]
date: str
image: str
studio: ScrapedStudio
movies: list[ScrapedMovie]
tags: list[ScrapedTag]
performers: list[ScrapedPerformer]
code: str
director: str
# Technically we can return a full ScrapedPerformer but the current UI only
# shows the name. The URL is absolutely necesserary for the result to be used
# in the next step: actually scraping the performer
class PerformerSearchResult(TypedDict):
name: str
url: str
# Technically we can return a full ScrapedScene but the current UI only
# shows the name, image, studio, tags and performers. The URL is absolutely
# necesserary for the result to be used in the next step: actually scraping the scene
class SceneSearchResult(TypedDict, total=False):
title: Required[str]
url: Required[str]
date: str
"Must be in the format YYYY-MM-DD"
image: str
"Image can be a URL or base64-encoded image"
tags: list[ScrapedTag]
performers: list[ScrapedPerformer]
studio: ScrapedStudio

View File

@@ -0,0 +1,676 @@
from argparse import ArgumentParser
from functools import reduce
from typing import Any, Callable, TypeVar
from urllib.error import URLError
from urllib.request import Request, urlopen
import json
import sys
def dig(c: dict | list, *keys: str | int | tuple[str | int, ...], default=None) -> Any:
"""
Helper function to get a value from a nested dict or list
If a key is a tuple the items will be tried in order until a value is found
:param c: dict or list to search
:param keys: keys to search for
:param default: default value to return if not found
:return: value if found, None otherwise
>>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}}
>>> dig(obj, "a", "b", 1)
'd'
>>> dig(obj, "a", ("e", "f"), "g")
'h'
"""
def inner(d: dict | list, key: str | int | tuple):
if isinstance(d, dict):
if isinstance(key, tuple):
for k in key:
if k in d:
return d[k]
return d.get(key)
elif isinstance(d, list) and isinstance(key, int) and key < len(d):
return d[key]
else:
return default
return reduce(inner, keys, c) # type: ignore
T = TypeVar("T")
def replace_all(obj: dict, key: str, replacement: Callable[[T], T]) -> dict:
"""
Helper function to recursively replace values in a nested dict, returning a new dict
If the key refers to a list the replacement function will be called for each item
:param obj: dict to search
:param key: key to search for
:param replacement: function called on the value to replace it
:return: new dict
>>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}}
>>> replace(obj, "g", lambda x: x.upper()) # Replace a single item
{'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}}
>>> replace(obj, "b", lambda x: x.upper()) # Replace all items in a list
{'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}}
>>> replace(obj, "z", lambda x: x.upper()) # Do nothing if the key is not found
{'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}}
"""
if not isinstance(obj, dict):
return obj
new = {}
for k, v in obj.items():
if k == key:
if isinstance(v, list):
new[k] = [replacement(x) for x in v]
else:
new[k] = replacement(v)
elif isinstance(v, dict):
new[k] = replace_all(v, key, replacement)
elif isinstance(v, list):
new[k] = [replace_all(x, key, replacement) for x in v]
else:
new[k] = v
return new
def replace_at(obj: dict, *path: str, replacement: Callable[[T], T]) -> dict:
"""
Helper function to replace a value at a given path in a nested dict, returning a new dict
If the path refers to a list the replacement function will be called for each item
If the path does not exist, the replacement function will not be called and the dict will be returned as-is
:param obj: dict to search
:param path: path to search for
:param replacement: function called on the value to replace it
:return: new dict
>>> obj = {"a": {"b": ["c", "d"], "f": {"g": "h"}}}
>>> replace_at(obj, "a", "f", "g", replacement=lambda x: x.upper()) # Replace a single item
{'a': {'b': ['c', 'd'], 'f': {'g': 'H'}}}
>>> replace_at(obj, "a", "b", replacement=lambda x: x.upper()) # Replace all items in a list
{'a': {'b': ['C', 'D'], 'f': {'g': 'h'}}}
>>> replace_at(obj, "a", "z", "g", replacement=lambda x: x.upper()) # Broken path, do nothing
{'a': {'b': ['c', 'd'], 'f': {'g': 'h'}}}
"""
def inner(d: dict, *keys: str):
match keys:
case [k] if isinstance(d, dict) and k in d:
if isinstance(d[k], list):
return {**d, k: [replacement(x) for x in d[k]]}
return {**d, k: replacement(d[k])}
case [k, *ks] if isinstance(d, dict) and k in d:
return {**d, k: inner(d[k], *ks)}
case _:
return d
return inner(obj, *path) # type: ignore
def is_valid_url(url):
"""
Checks if an URL is valid by making a HEAD request and ensuring the response code is 2xx
"""
try:
req = Request(url, method="HEAD")
with urlopen(req) as response:
return 200 <= response.getcode() < 300
except URLError:
return False
def __default_parser(**kwargs):
parser = ArgumentParser(**kwargs)
# Some scrapers can take extra arguments so we can
# do rudimentary configuration in the YAML file
parser.add_argument("extra", nargs="*")
subparsers = parser.add_subparsers(dest="operation", required=True)
# "Scrape with..." and the subsequent search box
subparsers.add_parser(
"performer-by-name", help="Search for performers"
).add_argument("--name", help="Performer name to search for")
# The results of performer-by-name will be passed to this
pbf = subparsers.add_parser("performer-by-fragment", help="Scrape a performer")
# Technically there's more information in this fragment,
# but in 99.9% of cases we only need the URL or the name
pbf.add_argument("--url", help="Scene URL")
pbf.add_argument("--name", help="Performer name to search for")
# Filling in an URL and hitting the "Scrape" icon
subparsers.add_parser(
"performer-by-url", help="Scrape a performer by their URL"
).add_argument("--url")
# Filling in an URL and hitting the "Scrape" icon
subparsers.add_parser(
"movie-by-url", help="Scrape a movie by its URL"
).add_argument("--url")
# The looking glass search icon
# name field is guaranteed to be filled by Stash
subparsers.add_parser("scene-by-name", help="Scrape a scene by name").add_argument(
"--name", help="Name to search for"
)
# Filling in an URL and hitting the "Scrape" icon
subparsers.add_parser(
"scene-by-url", help="Scrape a scene by its URL"
).add_argument("--url")
# "Scrape with..."
sbf = subparsers.add_parser("scene-by-fragment", help="Scrape a scene")
sbf.add_argument("-u", "--url")
sbf.add_argument("--id")
sbf.add_argument("--title") # Title will be filename if not set in Stash
sbf.add_argument("--date")
sbf.add_argument("--details")
sbf.add_argument("--urls", nargs="+")
# Tagger view or search box
sbqf = subparsers.add_parser("scene-by-query-fragment", help="Scrape a scene")
sbqf.add_argument("-u", "--url")
sbqf.add_argument("--id")
sbqf.add_argument("--title") # Title will be filename if not set in Stash
sbqf.add_argument("--code")
sbqf.add_argument("--details")
sbqf.add_argument("--director")
sbqf.add_argument("--date")
sbqf.add_argument("--urls", nargs="+")
# Filling in an URL and hitting the "Scrape" icon
subparsers.add_parser(
"gallery-by-url", help="Scrape a gallery by its URL"
).add_argument("--url", help="Gallery URL")
# "Scrape with..."
gbf = subparsers.add_parser("gallery-by-fragment", help="Scrape a gallery")
gbf.add_argument("-u", "--url")
gbf.add_argument("--id")
gbf.add_argument("--title")
gbf.add_argument("--date")
gbf.add_argument("--details")
gbf.add_argument("--urls", nargs="+")
return parser
def scraper_args(**kwargs):
"""
Helper function to parse arguments for a scraper
This allows scrapers to be called from the command line without
piping JSON to stdin but also from Stash
Returns a tuple of the operation and the parsed arguments: operation is one of
- performer-by-name
- performer-by-fragment
- performer-by-url
- movie-by-url
- scene-by-name
- scene-by-url
- scene-by-fragment
- scene-by-query-fragment
- gallery-by-url
- gallery-by-fragment
A scraper can be configured to take extra arguments by adding them to the YAML file:
```yaml
sceneByName:
action: script
script:
- python
- my-scraper.py
- extra
- args
- scene-by-name
```
When called from Stash through the above configuration this function would return:
```python
("scene-by-name", {"extra": ["extra", "args"], "name": "scene name"})
```
"""
parser = __default_parser(**kwargs)
args = vars(parser.parse_args())
# If stdin is not connected to a TTY the script is being executed by Stash
if not sys.stdin.isatty():
try:
stash_fragment = json.load(sys.stdin)
args.update(stash_fragment)
except json.decoder.JSONDecodeError:
# This would only happen if Stash passed invalid JSON
sys.exit(69)
return args.pop("operation"), args
def guess_nationality(country: str) -> str:
"""
Tries to guess the country from a string
Returns the original string if no match is found
"""
for c in country.split(","):
c = c.strip().lower()
if c in demonyms:
return demonyms[c]
return country
US_states = [
"AK",
"AL",
"AR",
"AZ",
"CA",
"CO",
"CT",
"DC",
"DE",
"FL",
"GA",
"HI",
"IA",
"ID",
"IL",
"IN",
"KS",
"KY",
"LA",
"MA",
"MD",
"ME",
"MI",
"MN",
"MO",
"MS",
"MT",
"NC",
"ND",
"NE",
"NH",
"NJ",
"NM",
"NV",
"NY",
"OH",
"OK",
"OR",
"PA",
"RI",
"SC",
"SD",
"TN",
"TX",
"UT",
"VA",
"VT",
"WA",
"WI",
"WV",
"WY",
"Alabama",
"Alaska",
"Arizona",
"Arkansas",
"California",
"Colorado",
"Connecticut",
"Delaware",
"Florida",
"Georgia",
"Hawaii",
"Idaho",
"Illinois",
"Indiana",
"Iowa",
"Kansas",
"Kentucky",
"Louisiana",
"Maine",
"Maryland",
"Massachusetts",
"Michigan",
"Minnesota",
"Mississippi",
"Missouri",
"Montana",
"Nebraska",
"Nevada",
"New Hampshire",
"New Jersey",
"New Mexico",
"New York",
"North Carolina",
"North Dakota",
"Ohio",
"Oklahoma",
"Oregon",
"Pennsylvania",
"Rhode Island",
"South Carolina",
"South Dakota",
"Tennessee",
"Texas",
"Utah",
"Vermont",
"Virginia",
"Washington",
"West Virginia",
"Wisconsin",
"Wyoming",
]
demonyms = {
# https://en.wikipedia.org/wiki/List_of_adjectival_and_demonymic_forms_for_countries_and_nations
"abkhaz": "Abkhazia",
"abkhazian": "Abkhazia",
"afghan": "Afghanistan",
"african american": "USA",
"albanian": "Albania",
"algerian": "Algeria",
"american samoan": "American Samoa",
"american": "USA",
"andorran": "Andorra",
"angolan": "Angola",
"anguillan": "Anguilla",
"antarctic": "Antarctica",
"antiguan": "Antigua and Barbuda",
"argentine": "Argentina",
"argentinian": "Argentina",
"armenian": "Armenia",
"aruban": "Aruba",
"australian": "Australia",
"austrian": "Austria",
"azerbaijani": "Azerbaijan",
"azeri": "Azerbaijan",
"bahamian": "Bahamas",
"bahraini": "Bahrain",
"bangladeshi": "Bangladesh",
"barbadian": "Barbados",
"barbudan": "Antigua and Barbuda",
"basotho": "Lesotho",
"belarusian": "Belarus",
"belgian": "Belgium",
"belizean": "Belize",
"beninese": "Benin",
"beninois": "Benin",
"bermudan": "Bermuda",
"bermudian": "Bermuda",
"bhutanese": "Bhutan",
"biot": "British Indian Ocean Territory",
"bissau-guinean": "Guinea-Bissau",
"bolivian": "Bolivia",
"bonaire": "Bonaire",
"bonairean": "Bonaire",
"bosnian": "Bosnia and Herzegovina",
"botswanan": "Botswana",
"bouvet island": "Bouvet Island",
"brazilian": "Brazil",
"british virgin island": "Virgin Islands, British",
"british": "United Kingdom",
"bruneian": "Brunei",
"bulgarian": "Bulgaria",
"burkinabé": "Burkina Faso",
"burmese": "Burma",
"burundian": "Burundi",
"cabo verdean": "Cabo Verde",
"cambodian": "Cambodia",
"cameroonian": "Cameroon",
"canadian": "Canada",
"cantonese": "Hong Kong",
"caymanian": "Cayman Islands",
"central african": "Central African Republic",
"chadian": "Chad",
"channel island": "Guernsey",
"chilean": "Chile",
"chinese": "China",
"christmas island": "Christmas Island",
"cocos island": "Cocos (Keeling) Islands",
"colombian": "Colombia",
"comoran": "Comoros",
"comorian": "Comoros",
"congolese": "Congo",
"cook island": "Cook Islands",
"costa rican": "Costa Rica",
"croatian": "Croatia",
"cuban": "Cuba",
"curaçaoan": "Curaçao",
"cypriot": "Cyprus",
"czech": "Czech Republic",
"danish": "Denmark",
"djiboutian": "Djibouti",
"dominican": "Dominica",
"dutch": "Netherlands",
"ecuadorian": "Ecuador",
"egyptian": "Egypt",
"emirati": "United Arab Emirates",
"emiri": "United Arab Emirates",
"emirian": "United Arab Emirates",
"english people": "England",
"english": "England",
"equatoguinean": "Equatorial Guinea",
"equatorial guinean": "Equatorial Guinea",
"eritrean": "Eritrea",
"estonian": "Estonia",
"ethiopian": "Ethiopia",
"european": "European Union",
"falkland island": "Falkland Islands",
"faroese": "Faroe Islands",
"fijian": "Fiji",
"filipino": "Philippines",
"finnish": "Finland",
"formosan": "Taiwan",
"french guianese": "French Guiana",
"french polynesian": "French Polynesia",
"french southern territories": "French Southern Territories",
"french": "France",
"futunan": "Wallis and Futuna",
"gabonese": "Gabon",
"gambian": "Gambia",
"georgian": "Georgia",
"german": "Germany",
"ghanaian": "Ghana",
"gibraltar": "Gibraltar",
"greek": "Greece",
"greenlandic": "Greenland",
"grenadian": "Grenada",
"guadeloupe": "Guadeloupe",
"guamanian": "Guam",
"guatemalan": "Guatemala",
"guinean": "Guinea",
"guyanese": "Guyana",
"haitian": "Haiti",
"heard island": "Heard Island and McDonald Islands",
"hellenic": "Greece",
"herzegovinian": "Bosnia and Herzegovina",
"honduran": "Honduras",
"hong kong": "Hong Kong",
"hong konger": "Hong Kong",
"hungarian": "Hungary",
"icelandic": "Iceland",
"indian": "India",
"indonesian": "Indonesia",
"iranian": "Iran",
"iraqi": "Iraq",
"irish": "Ireland",
"israeli": "Israel",
"israelite": "Israel",
"italian": "Italy",
"ivorian": "Ivory Coast",
"jamaican": "Jamaica",
"jan mayen": "Jan Mayen",
"japanese": "Japan",
"jordanian": "Jordan",
"kazakh": "Kazakhstan",
"kazakhstani": "Kazakhstan",
"kenyan": "Kenya",
"kirghiz": "Kyrgyzstan",
"kirgiz": "Kyrgyzstan",
"kiribati": "Kiribati",
"korean": "South Korea",
"kosovan": "Kosovo",
"kosovar": "Kosovo",
"kuwaiti": "Kuwait",
"kyrgyz": "Kyrgyzstan",
"kyrgyzstani": "Kyrgyzstan",
"lao": "Lao People's Democratic Republic",
"laotian": "Lao People's Democratic Republic",
"latvian": "Latvia",
"lebanese": "Lebanon",
"lettish": "Latvia",
"liberian": "Liberia",
"libyan": "Libya",
"liechtensteiner": "Liechtenstein",
"lithuanian": "Lithuania",
"luxembourg": "Luxembourg",
"luxembourgish": "Luxembourg",
"macanese": "Macau",
"macedonian": "North Macedonia",
"magyar": "Hungary",
"mahoran": "Mayotte",
"malagasy": "Madagascar",
"malawian": "Malawi",
"malaysian": "Malaysia",
"maldivian": "Maldives",
"malian": "Mali",
"malinese": "Mali",
"maltese": "Malta",
"manx": "Isle of Man",
"marshallese": "Marshall Islands",
"martinican": "Martinique",
"martiniquais": "Martinique",
"mauritanian": "Mauritania",
"mauritian": "Mauritius",
"mcdonald islands": "Heard Island and McDonald Islands",
"mexican": "Mexico",
"moldovan": "Moldova",
"monacan": "Monaco",
"mongolian": "Mongolia",
"montenegrin": "Montenegro",
"montserratian": "Montserrat",
"monégasque": "Monaco",
"moroccan": "Morocco",
"motswana": "Botswana",
"mozambican": "Mozambique",
"myanma": "Myanmar",
"namibian": "Namibia",
"nauruan": "Nauru",
"nepalese": "Nepal",
"nepali": "Nepal",
"netherlandic": "Netherlands",
"new caledonian": "New Caledonia",
"new zealand": "New Zealand",
"ni-vanuatu": "Vanuatu",
"nicaraguan": "Nicaragua",
"nigerian": "Nigeria",
"nigerien": "Niger",
"niuean": "Niue",
"norfolk island": "Norfolk Island",
"northern irish": "Northern Ireland",
"northern marianan": "Northern Mariana Islands",
"norwegian": "Norway",
"omani": "Oman",
"pakistani": "Pakistan",
"palauan": "Palau",
"palestinian": "Palestine",
"panamanian": "Panama",
"papua new guinean": "Papua New Guinea",
"papuan": "Papua New Guinea",
"paraguayan": "Paraguay",
"persian": "Iran",
"peruvian": "Peru",
"philippine": "Philippines",
"pitcairn island": "Pitcairn Islands",
"polish": "Poland",
"portuguese": "Portugal",
"puerto rican": "Puerto Rico",
"qatari": "Qatar",
"romanian": "Romania",
"russian": "Russia",
"rwandan": "Rwanda",
"saba": "Saba",
"saban": "Saba",
"sahraouian": "Western Sahara",
"sahrawi": "Western Sahara",
"sahrawian": "Western Sahara",
"salvadoran": "El Salvador",
"sammarinese": "San Marino",
"samoan": "Samoa",
"saudi arabian": "Saudi Arabia",
"saudi": "Saudi Arabia",
"scottish": "Scotland",
"senegalese": "Senegal",
"serbian": "Serbia",
"seychellois": "Seychelles",
"sierra leonean": "Sierra Leone",
"singapore": "Singapore",
"singaporean": "Singapore",
"slovak": "Slovakia",
"slovene": "Slovenia",
"slovenian": "Slovenia",
"solomon island": "Solomon Islands",
"somali": "Somalia",
"somalilander": "Somaliland",
"south african": "South Africa",
"south georgia island": "South Georgia and the South Sandwich Islands",
"south ossetian": "South Ossetia",
"south sandwich island": "South Georgia and the South Sandwich Islands",
"south sudanese": "South Sudan",
"spanish": "Spain",
"sri lankan": "Sri Lanka",
"sudanese": "Sudan",
"surinamese": "Suriname",
"svalbard resident": "Svalbard",
"swati": "Eswatini",
"swazi": "Eswatini",
"swedish": "Sweden",
"swiss": "Switzerland",
"syrian": "Syrian Arab Republic",
"taiwanese": "Taiwan",
"tajikistani": "Tajikistan",
"tanzanian": "Tanzania",
"thai": "Thailand",
"timorese": "Timor-Leste",
"tobagonian": "Trinidad and Tobago",
"togolese": "Togo",
"tokelauan": "Tokelau",
"tongan": "Tonga",
"trinidadian": "Trinidad and Tobago",
"tunisian": "Tunisia",
"turkish": "Turkey",
"turkmen": "Turkmenistan",
"turks and caicos island": "Turks and Caicos Islands",
"tuvaluan": "Tuvalu",
"ugandan": "Uganda",
"ukrainian": "Ukraine",
"uruguayan": "Uruguay",
"uzbek": "Uzbekistan",
"uzbekistani": "Uzbekistan",
"vanuatuan": "Vanuatu",
"vatican": "Vatican City State",
"venezuelan": "Venezuela",
"vietnamese": "Vietnam",
"wallis and futuna": "Wallis and Futuna",
"wallisian": "Wallis and Futuna",
"welsh": "Wales",
"yemeni": "Yemen",
"zambian": "Zambia",
"zimbabwean": "Zimbabwe",
"åland island": "Åland Islands",
**{s.lower(): "USA" for s in US_states},
}