Maintain the table structure of postgresql-15 to neo4j-5.8.1 based on python3.11

Question background

  • The interdependence of database tables is complicated, and I don’t know how to track the reconstruction
  • Some businesses need to be completed through the graph database, and the technology is implemented through the database table
  • Familiarize yourself with the postgresql database
  • Experience the new features of some toolkits of higher versions of python
  • Learn about some new features of neo4j

Implement logic

Read configuration file tool class

import yaml
import os

os.chdir(os.path.dirname(__file__))
# Read the configuration file of the service
def get_config(yaml_file="service.yaml") -> dict:
    with open(yaml_file, 'r') as file:
        data = file. read()
        result = yaml.load(data, Loader=yaml.FullLoader)
        return result

The account information of the database table is defined in the service.yaml file:

demo:
  db:
    postgresql:
      user: the account you set
      password: the password you set
      port: the port you set
      host: your postgresql server address
      db: postgres
    neo4j:
        user: user name of neo4j
        password: neo4j login password
        uri: address of neo4j

postgresql database connection tool class

import os

from sqlalchemy import create_engine
from sqlalchemy.orm import Session, sessionmaker

from common import ConfigHelper

class PostgresqlHelper(object):
    def __init__(self):
        self.db_type = "postgresql"
        self.config = ConfigHelper.get_config()['demo']

    def get_config(self):
        return self.config

    # Get the database connection engine
    def get_db_engine(self):
        db_json = self.config["db"][self.db_type]
        engine = create_engine(
            'postgresql://{}:{}@{}:{}/{}'.format(db_json['user'], db_json['password'],
                                                 db_json['host'], db_json['port'],
                                                 db_json['db']), echo=False)
        return engine

    def get_db_connect(self):
        return self. get_db_engine(). connect()

    # get session session
    def get_session(self) -> Session:
        db_session = sessionmaker(bind=self.get_db_engine(), future=True)
        return db_session()

The above tool class completes the connection to the database through the orm toolkit of sqlalchemy, which facilitates the operation of the database table later. If you still don’t know how to install the postgresql database, you can check the postgresql-15 installation based on the Centos7 system (simple operation), which is easy to operate.

tool class for neo4j graph database operation

from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher

from common import ConfigHelper


class Neo4jHelper(object):
    """
    Realize the Neo4j database connection function, and the specific implementation is chosen by the implementer.
    Implementations should avoid using connection libraries in external libraries as much as possible, and use other methods (such as executing SQL queries, connecting to external libraries, etc.) for external libraries.
    These methods are responsible for creating and using connections, not at the discretion of the implementor.
    """

    def __init__(self):
        config_json = ConfigHelper.get_config()['demo']["db"]["neo4j"]
        self.graph = Graph(config_json["uri"], auth=(config_json["user"], config_json["password"]))
        self. node_matcher = NodeMatcher(self. graph)
        self. relationship_matcher = RelationshipMatcher(self. graph)

    def create_node(self, label, name, data):
        """
        Create a node with the specified label and data
        :param label: the type of node to be created
        :param name: the name of the node to be created
        :param data: the data of the node to be created (not including name, which is the name of the node)
        :return:
        """
        node = Node(label, name=name, **data)
        self.graph.create(node)
        return node

    def create_rel(self, start_node, end_node, type_, data):
        """
        Creates a relation, with the specified type and data
        :param start_node: The starting node of the relationship to be created
        :param end_node: the end node of the relationship to be created
        :param type_: the type of relationship to create
        :param data: the data of the relationship to be created
        :return:
        """
        return self.graph.create(Relationship(start_node, type_, end_node, **data))

    def find_node(self, label: str, properties: dict):
        """
        Find a node with data of the specified type and condition
        :param label: the type of node to look for
        :param properties: The conditional data of the node to find
        :return:
        """
        nodes = list(self. node_matcher. match(label, **properties))
        return nodes

    def find_relationship(self, rel_type, properties):
        """
        Finds a relation with data of the specified type and condition
        :param rel_type: the type of relationship to look for
        :param properties: conditional data for the relationship to look for
        :return:
        """
        relationships = list(self.relationship_matcher.match(r_type=rel_type, **properties))
        return relationships

    def update_node(self, node, **properties):
        """
        Update a node with data of the specified type and condition
        :param node: The node to update
        :param properties: Conditional data of the node to update
        :return:
        """
        for key, value in properties.items():
            node[key] = value
        self.graph.push(node)

    def delete_node(self, node):
        """
        delete a node
        :param node: the node to delete
        :return:
        """
        incoming_rels = self.relationship_matcher.match(nodes=[node], r_type=None)
        outgoing_rels = self.relationship_matcher.match(nodes=None, r_type=None, end_node=node)

        for rel in incoming_rels:
            self.graph.separate(rel)
        for rel in outgoing_rels:
            self.graph.separate(rel)

        # Delete the node
        self.graph.delete(node)

    def delete_relationship(self, relationship):
        """
        delete a relation
        :param relationship: the relationship to delete
        :return:
        """
        self.graph.separate(relationship)

The above code implements the addition, deletion, modification and query logic of the graph database through the py2neo toolkit. Of course, if you are not familiar with the installation of the neo4j database, or the installation is not smooth, you can refer to the online installation of neo4j-5.6.0 on Centos7, which is easy to understand.

Business logic implementation

import json

from sqlalchemy import inspect

from common.Neo4jHelper import Neo4jHelper
from common.PostgresqlHelper import PostgresqlHelper

if __name__ == '__main__':
    db = PostgresqlHelper()
    eng = db.get_db_engine()
    inspector = inspect(eng)
    # Tables that need to establish a graph relationship (curriculum table, score table, student table)
    table_name_list = ["course", "score", "user"]
    schema_name = "new_browser"
    # Basic information of the table (description)
    data = {<!-- -->}
    for t in table_name_list:
        table_data = {<!-- -->}
        comment = inspector.get_table_comment(table_name=t, schema=schema_name)
        table_data["comment"] = comment["text"]
        columns = inspector.get_columns(table_name=t, schema=schema_name)
        columns_info = {<!-- -->}
        for column in columns:
            columns_info[column['name']] = str(column['type'])
        table_data["columns_info"] = json. dumps(columns_info)
        data[t] = table_data
    app = Neo4jHelper()
    label = "Table"
    node_data = {<!-- -->}
    for t in table_name_list:
        # Create table nodes
        table_comment = data[t]["comment"]
        node_data[t] = app.create_node(name=f"{<!-- -->t}({<!-- -->table_comment})", label=label, data=data[t])
    # create relationship
    app.create_rel(node_data["score"], node_data["user"], "user_name", {<!-- -->"info": "Student's name depends on the student table"})
    app.create_rel(node_data["score"], node_data["course"], "course_name", {<!-- -->"info": "course name depends on course schedule"})

The above code is mainly to obtain the structural information of the table through sqlalchemy, as the node information of the graph database, the table name + table comment as the name of the node, the above three tables in psql (student information table, course information table and score information tables) to create relationships between nodes in the graph database. Because the three tables are all database tables, the label of the node is “Table”, and the node information contains the field information (including the type). You can also save the information you want according to your needs. The results of the operation can be found in the View in Neo4j Desktop:

It can be seen that the user information and course information in the score table correspond to the user table and course table respectively. Click the corresponding node to view the detailed information of the node:

Click the test result table, you can see the corresponding column_info, comment, name information. The entire development project is over, and you can download my complete code. I hope you can communicate more!