Skip to content
avr 22 / David Regnier

Extraction et « reporting » via ODBC sous Python

Extraction de données depuis Sybase et « reporting » via pyodbc

Prérequis:

Voici un exemple de script qui va ouvrir une connexion via « pyodbc » sous Windows à Sybase et produire une extraction csv simple.
Tout d’abord il faut préparer les librairies à importer:

...
...
# Import lib
import os
import csv
import sys
import pyodbc
import textwrap
import traceback

# Lib for config variable
try:
    from lib.config.configobj import ConfigObj
    obj_config = ConfigObj('config/config.cfg')
except ImportError:
    ConfigObj = None
    sys.stderr.write('ERROR: %s' % traceback.format_exc())
    sys.exit()

# Lib for helpers
try:
    from lib.helper.helper_misc import LogStats, ToolsBox, ToolsValidator
except ImportError:
    LogStats = None
    ToolsBox = None
    sys.stderr.write('ERROR: %s' % traceback.format_exc())
    sys.exit()
...

Contenu de la classe « LogStats », cette classe va permettre les logs sur la console et un fichier plat en même temps:

...
...
"""Begin LogStats class"""
class LogStats(object):
    def __init__(self, output_file=os.getcwd()+'\\log\\'+'stats.log'):
        """
        Description: Constructor, init stats log file
        Call sample: sys.stdout = LogStats()
        @param output_file: Path to stats log file
        @return: void
        """
        self.console = sys.stdout
        self.file = open(output_file, 'w')

    def write(self, flow):
        """
        Description: Write flow to console and file
        @param flow: String to write
        @return: int
        """
        try:
            if flow <> '':
                self.console.write(flow)
                self.file.write(flow)
            return 0
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()
"""End LogStats class"""
...

Voici le contenu des classes ToolsBox et ToolsValidator utilisées dans ce script:

...
...
"""Begin ToolsBox class"""
class ToolsBox(object):
    @staticmethod
    def set_type(item):
        """
        Description: Set type for CSV from Sybase SGBD
        Call sample: ToolsBox.set_type('FRA')
        @param item: Value from Sybase to cast
        @return: string
        """
        try:
            if type(item) <> str:
                if not item:
                    return 'NULL'
                else:
                    return str(item)
            return item.lstrip().rstrip()
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()     

    @staticmethod
    def set_log(level, key, value, extra_value=''):
        """
        Description: Return log string
        Call sample: ToolsBox.set_log('ERROR', 'label_section', 'notice_starting_module', 'optional free text')
        @param level: Level of log, ERROR, INFO ...
        @param key: Config key from config file
        @param value: Config value
        @param extra_value: Extra free text value if needed
        @return: string
        """
        try:
            if level and key and value:
                return time.strftime('%Y/%m/%d - %H:%M:%S') + ' | ' + level + ': %s' % obj_config[key][value] + extra_value
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()    

    @staticmethod
    def create_sys_dsn(driver, **kwargs):
        """
        Description: Create a system DSN
        @param driver: ODBC driver name
        @param kwargs: Driver attributes
        @return: int(0 - DSN not created / 1 - DSN created)
        """
        try:
            null = chr(0)
            attributes = []
            for item in kwargs.keys():
                attributes.append("%s=%s" % (item, kwargs[item]))
            return ctypes.windll.ODBCCP32.SQLConfigDataSource(0, ODBC_ADD_SYS_DSN, driver, null.join(attributes))
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()

    @staticmethod
    def init_error_log():
        """
        Description: Init error log file
        Call sample: ToolsBox.init_error_log()
        @return: int
        """
        try:
            if not os.path.exists('log'):
                os.makedirs('log')
            file_socket_error = open(os.getcwd() + '\\log\\' + 'error.log', 'w')
            sys.stderr = file_socket_error
            return 0
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()

    @staticmethod
    def init_folder_tree(*kwargs):
        """
        Description: Create folder tree for module
        Call sample: ToolsBox.init_folder_tree(folder1, folder2, ...)
        @param *kwargs: folder1, folder2, ...
        @return: int
        """
        try:
            for element in kwargs:
                if not os.path.exists(element):
                    os.mkdir(element)
            return 0
        except Exception:
            sys.stderr.write('ERROR: %s' % traceback.format_exc())
            sys.exit()
"""End ToolsBox class"""    

"""Begin ToolsValidator class"""
class ToolsValidator(object):
    # Description: Constructor
    #              Init validator
    # @param self: Constructor parameter
    # @param from_date: From date
    # @return: void
    def __init__(self, from_date=''):
        self.from_date = from_date

    # Description: DATE validation
    #
    # @param self: Constructor parameter
    # @return: int
    def valid_date(self):
        try:
            if not self.from_date:
                raise Exception('%s' % 'DATE cannot be empty')
            elif len(self.from_date) <> 10:
                raise Exception('%s' % 'Wrong DATE length' + ' => ' + self.from_date)
            elif not self.from_date.replace('-', '').isdigit():
                raise Exception('%s' % 'Wrong DATE type, should be only numeric => ' + self.from_date)
            try:
                datetime.strptime(self.from_date, '%Y-%m-%d')
            except ValueError:
                raise ValueError('%s' % 'Incorrect DATE format, should be YYYY-MM-DD, or wrong DATE => ' + self.from_date)
            return 0
        except Exception:
            sys.stderr.write('ERROR: %s\n' % traceback.format_exc())
            sys.exit()
"""End ToolsValidator class"""
...

Enfin, voici le script principal qui va produire le rapport, le programme va créer un DSN sous Windows (pour la connexion à Sybase) si il n’existe pas déjà:

...
...
def exec_module(navire_deb_val_dt):
    """
    Description: Execute Python module
    - Validate input date
    - Config object
    - DSN if not exist
    - Folder tree
    - Log files (stats, error)
    - Build vessels report
    @return void
    """
    try:
        # Pre-check, date must be valid
        if not navire_deb_val_dt:
            navire_deb_val_dt = '1989-01-01' # Min date
        validator = ToolsValidator(navire_deb_val_dt)
        validator.valid_date()

        # Initialize log files
        ToolsBox.init_error_log() # Error log
        sys.stdout = LogStats() # Stats log

        # Init folder tree for this module
        ToolsBox.init_folder_tree(obj_config['file_section']['output_folder_results'])

        # Set output result file variable
        output_file = obj_config['file_section']['output_folder_results'] + '/' + obj_config['file_section']['output_file_results']

        # SQL script file variable
        sql_file = obj_config['sgbd_section']['sgbd_query_file']

        # Create DSN if not exist
        result = ToolsBox.create_sys_dsn(
            "Adaptive Server Enterprise",
            SERVER = obj_config['sgbd_section']['sgbd_server'],
            PORT = obj_config['sgbd_section']['sgbd_port'],
            DESCRIPTION = obj_config['sgbd_section']['sgbd_dsn_name'],
            DSN = obj_config['sgbd_section']['sgbd_dsn_name'],
            DATABASE = obj_config['sgbd_section']['sgbd_database'],
            UID = obj_config['sgbd_section']['sgbd_uid'],
            PASSWORD = obj_config['sgbd_section']['sgbd_password']
        )
        if result:
            i = 0
            print ToolsBox.set_log('INFO', 'label_section', 'notice_dsn_created_ok')
            print ToolsBox.set_log('INFO', 'label_section', 'notice_query_running')

            # Open ODBC connection
            odbc_connection = pyodbc.connect('DSN=' + obj_config['sgbd_section']['sgbd_dsn_name'])
            cursor = odbc_connection.cursor()

            # CSV read / write process
            file_handle = open(output_file, 'wb')
            obj_writer = csv.writer(file_handle, delimiter = ';')

            # Open, read SQL script and replace navire_deb_val_dt date if any
            sql_file_handle = open(sql_file, 'r').read().replace('YYYY-MM-DD', navire_deb_val_dt)

            # Execute SQL statement
            cursor.execute(sql_file_handle)
            rows = cursor.fetchall()

            # Write lines from SQL
            dictionary = {}

            for line in rows:
                # Iterate, feed dictionary, remove duplicate on the whole line
                dictionary[line[1]] = ToolsBox.set_type(line[0]) + '_' + ToolsBox.set_type(line[2]) + '_' + ToolsBox.set_type(line[3]) + '_' + ToolsBox.set_type(line[4]) + '_' + ToolsBox.set_type(line[5]) + '_' + ToolsBox.set_type(line[6]) + '_' + ToolsBox.set_type(line[7]) + '_' + ToolsBox.set_type(line[8]) + '_' + ToolsBox.set_type(line[9])+ '_' + ToolsBox.set_type(line[10]) + '_' + ToolsBox.set_type(line[11])+ '_' + ToolsBox.set_type(line[12]) + '_' + ToolsBox.set_type(line[13]) + '_' + ToolsBox.set_type(line[14])+ '_' + ToolsBox.set_type(line[15])

            # Iterate through sorted dictionary
            obj_writer.writerow(['header_column_01', 'header_column_02', ...]) # You have to change this if you need header into your CSV output file
            for key in dictionary:
                row = dictionary[key].split('_')
                i += 1
                obj_writer.writerow([row[0], key, row[1], row[2], row[3], row[4], row[5], row[6], row[7], row[8], row[9], row[10], row[11], row[12], row[13], row[14]])

            # Close files handle
            file_handle.close() 

            # Check if result is not empty
            if i <> 0:
                print ToolsBox.set_log('INFO', 'label_section', 'notice_report_done_ok', ' ' + output_file)
                print ToolsBox.set_log('INFO', 'label_section', 'notice_nb_lines', ' ' + '(' + str(i) + ')')
            else:
                print ToolsBox.set_log('INFO', 'label_section', 'notice_empty_file')
                os.remove(output_file) # Remove empty file
            sys.exit()
        else:
            print ToolsBox.set_log('ERROR', 'label_section', 'error_dsn_created_ko')
    except Exception:
        sys.stderr.write('ERROR: %s' % traceback.format_exc())
        sys.exit()

def main():
    """
    Description: Main entry point for this module
        - Build report for vessels
    @return void
    """
    try:
        # Display menu for user
        while 1:
            sys.stdout.write(
                textwrap.dedent("""\
                Valid commands are:
                -------------------------------------------
                [E] to exit
                [get_vessels_report] [YYYY-MM-DD] will produce vessels report

                Help:
                    If you avoid [YYYY-MM-DD], this will produce all vessels from the beginning
                """)
            )

            # Read input
            oCommandLine = raw_input()
            if oCommandLine == 'E':
                break
            elif 'get_vessels_report' in oCommandLine:
                exec_module(oCommandLine[19:29])
            else:
                print 'Unknown command'
                sys.exit()

    except Exception:
        sys.stderr.write('ERROR: %s' % traceback.format_exc())
        sys.exit()        

if __name__ == '__main__':
    main()
...

Voici également mon fichier de configuration pour exemple (j’utilise la librairie: ConfigObj):


[file_section]
output_folder_results = ResultFolder
output_file_results = ReportFileName.csv

[label_section]
error_dsn_created_ko = DSN not created => KO
notice_empty_file = No result for this request
notice_dsn_created_ok = DSN created successfully => OK
notice_report_done_ok = Report has been done successfully:
notice_query_running = The SQL statement is running please wait...
notice_nb_lines = line numbers

[sgbd_section]
sgbd_uid = your_login
sgbd_password = your_password
sgbd_server = your_server
sgbd_dsn_name = PRODUCTION_SYBASE_DSN
sgbd_port = your_port_number
sgbd_database = your_database
sgbd_query_file = config/query.sql

Et pour finir un exemple de requête SQL avec une date pour le remplacement à partir du menu utilisateur Python, c’est votre source de données:


SELECT
T.FIELD1,
E.FIELD2,
...
FROM
DB..T_TABLE1 T
LEFT JOIN DB..T_TABLE2 E
ON T.PK = E.FK
...
WHERE
...
AND T.FIELD_DATE >= 'YYYY-MM-DD'
...

Ce script va ouvrir et exécuter une requête SQL (pour cet exemple => SGBD Sybase), donc vous devez créer votre propre requête, ce qui est donné dans le script SQL query.sql est arbitraire….
Laisser un commentaire


8 + = 9