Module embeddingsprep.preprocessing.utils.readers

Readers

Contains utils functions to parse args from docopt (not always in the right format), to check if directories exist, to convert strings to int/float etc.

Expand source code
"""
# Readers

Contains utils functions to parse args from docopt (not always in the right format), to check if directories exist, to convert strings to int/float etc.
"""
import os
import logging

logger = logging.getLogger("preprocessor")


def readArgs(args):
    """Reads and correct docopt parsed arguments from the command-line interface.
    For instance, int, floats, booleans need to be converted back to their
    type.

    Args:
        args : dic
            docopt generated argument dictionnary
    Returns:
        params : dic
            the dictionnary of corrected arguments
    """
    params = {}
    for k in args.keys():
        k2 = k.replace("<", "").replace(">", "").replace("-", "")
        try:  # Convert strings to int or floats when required
            params[k2] = int(args[k])
        except:
            try:
                params[k2] = float(args[k])
            except:
                try:
                    params[k2] = str2bool(args[k])
                except:
                    params[k2] = args[k]
    return params


def str2bool(s):
    """Helps to convert a string representing a boolean into the boolean
    Args:
        s : str
    Returns:
        b : boolean
    """
    if s == "True":
        return True
    elif s == "False":
        return False
    else:
        raise ValueError


def checkExistenceDir(path):
    """Checks if a given path is the path of a directory, and if not, creates
    the directory.
    Args:
        path : str
            A string representing a path
    """
    path = os.path.abspath(path)
    if not os.path.isdir(path):
        logger.warning(
            "Directory {} does not seem to exist, creating one.".format(path)
        )
        os.mkdir(path)


def checkExistenceFile(path):
    """Checks if a given path is the path of a directory.
    Args:
        path : str
            A string representing a path
    """
    path = os.path.abspath(path)
    return os.path.isfile(path)


def openFile(filepath):
    """Read lines of a txt file in 'filepath' and returns a string
    Args:
        path : str
            A string representing a path
    Returns:
        text : str
            The text string
    """
    assert checkExistenceFile(filepath), "filepath does not exist"
    with open(filepath, "r", encoding="utf-8") as f:
        text = " ".join(map(lambda x: x.rstrip("\n"), f.readlines()))
    return text


def convertInt(s):
    """Tells if a string can be converted to int and converts it
    Args:
        s : str
    Returns:
        s : str
            Standardized token 'INT' if s can be turned to an int, s otherwise
    """
    try:
        int(s)
        return "INT"
    except:
        return s


def convertFloat(s):
    """Tells if a string can be converted to float and converts it
    Args:
        s : str
    Returns:
        s : str
            Standardized token 'FLOAT' if s can be turned to an float, s
            otherwise"""
    try:
        float(s)
        return "FLOAT"
    except:
        return s

Functions

def checkExistenceDir(path)

Checks if a given path is the path of a directory, and if not, creates the directory.

Args

path : str
A string representing a path
Expand source code
def checkExistenceDir(path):
    """Checks if a given path is the path of a directory, and if not, creates
    the directory.
    Args:
        path : str
            A string representing a path
    """
    path = os.path.abspath(path)
    if not os.path.isdir(path):
        logger.warning(
            "Directory {} does not seem to exist, creating one.".format(path)
        )
        os.mkdir(path)
def checkExistenceFile(path)

Checks if a given path is the path of a directory.

Args

path : str
A string representing a path
Expand source code
def checkExistenceFile(path):
    """Checks if a given path is the path of a directory.
    Args:
        path : str
            A string representing a path
    """
    path = os.path.abspath(path)
    return os.path.isfile(path)
def convertFloat(s)

Tells if a string can be converted to float and converts it

Args

s : str
 

Returns

s : str
Standardized token 'FLOAT' if s can be turned to an float, s otherwise
Expand source code
def convertFloat(s):
    """Tells if a string can be converted to float and converts it
    Args:
        s : str
    Returns:
        s : str
            Standardized token 'FLOAT' if s can be turned to an float, s
            otherwise"""
    try:
        float(s)
        return "FLOAT"
    except:
        return s
def convertInt(s)

Tells if a string can be converted to int and converts it

Args

s : str
 

Returns

s : str
Standardized token 'INT' if s can be turned to an int, s otherwise
Expand source code
def convertInt(s):
    """Tells if a string can be converted to int and converts it
    Args:
        s : str
    Returns:
        s : str
            Standardized token 'INT' if s can be turned to an int, s otherwise
    """
    try:
        int(s)
        return "INT"
    except:
        return s
def openFile(filepath)

Read lines of a txt file in 'filepath' and returns a string

Args

path : str
A string representing a path

Returns

text : str
The text string
Expand source code
def openFile(filepath):
    """Read lines of a txt file in 'filepath' and returns a string
    Args:
        path : str
            A string representing a path
    Returns:
        text : str
            The text string
    """
    assert checkExistenceFile(filepath), "filepath does not exist"
    with open(filepath, "r", encoding="utf-8") as f:
        text = " ".join(map(lambda x: x.rstrip("\n"), f.readlines()))
    return text
def readArgs(args)

Reads and correct docopt parsed arguments from the command-line interface. For instance, int, floats, booleans need to be converted back to their type.

Args

args : dic
docopt generated argument dictionnary

Returns

params : dic
the dictionnary of corrected arguments
Expand source code
def readArgs(args):
    """Reads and correct docopt parsed arguments from the command-line interface.
    For instance, int, floats, booleans need to be converted back to their
    type.

    Args:
        args : dic
            docopt generated argument dictionnary
    Returns:
        params : dic
            the dictionnary of corrected arguments
    """
    params = {}
    for k in args.keys():
        k2 = k.replace("<", "").replace(">", "").replace("-", "")
        try:  # Convert strings to int or floats when required
            params[k2] = int(args[k])
        except:
            try:
                params[k2] = float(args[k])
            except:
                try:
                    params[k2] = str2bool(args[k])
                except:
                    params[k2] = args[k]
    return params
def str2bool(s)

Helps to convert a string representing a boolean into the boolean

Args

s : str
 

Returns

b : boolean
 
Expand source code
def str2bool(s):
    """Helps to convert a string representing a boolean into the boolean
    Args:
        s : str
    Returns:
        b : boolean
    """
    if s == "True":
        return True
    elif s == "False":
        return False
    else:
        raise ValueError