Source code for pyrfume.sigma_ff
"""
sigma_ff_catalog.txt produced using pdf2txt.py found in PDFminer
on sigma_ff_catalog.pdf.
"""
import platform
import pyrfume
CATALOG_PATH = pyrfume.DEFAULT_DATA_PATH / "sigma" / "sigma_ff_catalog.txt"
[docs]def get_data():
with open(CATALOG_PATH, "r") as f:
text = f.read()
lines = text.split("\n")
data = {}
organoleptic = 0
for line_num, line in enumerate(lines):
if len(line):
if not organoleptic and line[0] == "[":
key = line.split("]")[0][1:]
if platform.python_version() > "3.0":
key = key.replace("\u2011", "-")
else:
key = key.decode("utf-8").replace("\u2011", "-").encode("ascii")
organoleptic = 1
if organoleptic and "Organoleptic" in line:
try:
value = line.split(":")[1][1:]
if value[-1] in ["-", ","]:
if value[-1] == "-":
value = value[:-1]
else:
value = value + " "
value += lines[line_num + 1]
value = [i.strip() for i in value.split(";") if len(i.strip())]
data[key] = value
organoleptic = 0
except Exception:
pass
print("%d compounds described." % len(data))
descriptors = []
for x in data.values():
descriptors += x
descriptors = list(set(descriptors)) # Remove duplicates.
print("%d descriptors used." % len(descriptors))
return descriptors, data
if __name__ == "__main__":
descriptors, data = get_data()