Everyone knows the power of python in data processing, data analysis and machine learning. So how to use Python to implement Web-based HTTP applications and services? In fact, there are many ways, such as using gradient and streamlit to implement web applications, using FastApi implements web services, etc. For details, see the previous large model application article. This article uses the tornado framework to implement http web services, and combines the needs of natural language processing (NLP) word segmentation and entity recognition to implement how to provide corresponding http services. . details as follows:
Operating environment: python3.10, tornado, jieba, time, logging, etc.
Run command: python httpServer_nlp.py
Calling method: http://localhost:8082/cutsegment?content=Study on three aspects of research and implementation of semantic analysis and extraction methods of oil and gas exploration and development documents & amp;search_type=1
Recognition results (word segmentation):{“cut”: [“Oil and gas exploration”, “Development”, “Document”, “Semantics”, “Analysis\ “, “extraction”, “method”, “research”, “three”, “research”], “entities”: [], “returncode”: 0 , “message”: “ok”, “runtime”: 0.3878319263458252}
Recognition results (entities):{“cut”: [], “entities”: [“Method”, “Semantics”, “Analysis”, ” extract”, “implementation”, “documentation”, “aspect”], “returncode”: 0, “message”: “ok”, “runtime”: 0.4005763530731201 }
import sys import os import time import tornado.httpserver from tornado.options import define, options import json from collections import OrderedDict import re import jieba from jieba import analyze import logging # Set up logging logger = logging.getLogger() # Get the logger logger.setLevel(logging.INFO) #Set the global log output level # Create a file logging processor and specify some setting options fileHandler = logging.FileHandler(filename=f'./log/service.log', mode='a + ', encoding='utf-8', delay=False) # Define log output style (formatter) format_option = '%(asctime)s - %(filename)s[line:%(lineno)d] - %(threadName)s - %(levelname)s: %(message)s' fileHandler.setFormatter(logging.Formatter(format_option)) # Add the logging processor to the log object logger.addHandler(fileHandler) # logger.info("result:{}".format(output)) # Local logging example # Set port define("port", default=8082, help="--port", type=int) # Get word segmentation results def get_kg_result_0(text): jieba.load_userdict("./data/StopWord/user_dict.txt") # Load a custom word segmentation dictionary # 2 Get stop words stwlist = get_stop_words() text,theDate=get_date(text) # 3 Split words and remove stop words out_dict = remove_special_tokens(jieba.cut(text, cut_all=False), stwlist) print('\\ \\ 1. Load custom word segmentation dictionary: \\ ' + "/ ".join(out_dict)) return out_dict + theDate # Use TF-IDF to obtain entities def get_entity_0(text): # Extract the first few keywords and return them according to the weight value from high to low. print('Extract the first number of keywords' + '//') textrank = analyze.textrank keywords2 = textrank(text) print(keywords2) return keywords2 # Read stop words def get_stop_words(path=r'./data/StopWord/NLPIR_stopwords.txt'): file = open(path, 'r',encoding='utf-8').read().split('\\ ') return set(file) # Remove some stop words/single characters/empty characters, etc. def remove_special_tokens(words, stwlist): words_list = list(words) stop_words = stwlist for i in range(words_list.__len__())[::-1]: if words_list[i] in stop_words: # Remove stop words words_list.pop(i) elif (len(words_list[i]) == 1): # Remove a single character words_list.pop(i) elif words_list[i] == " ": # Remove empty characters words_list.pop(i) return words_list # Get the date as a whole, the date needs to be processed separately before word segmentation def get_date(content): pattern = r'\d{4}-\d{1,2}-\d{1,2}|\d{4}year\d{1,2}month\d{ 1,2}day|\d{4}/\d{1,2}/\d{1,2}' result = re.findall(pattern, content) for item in result: content = content.replace(item, "祥") # Convert date into special characters, usually replaced by uncommon Chinese characters return content,result # tornado structure, you only need to add code inside and standardize the output format. class MainGetHandler(tornado.web.RequestHandler): def recog(self, mode="get"): """ Can support both get and post requests """ if mode == "get": sub = self.get_argument("content", None) search_type = self.get_argument("search_type", 0) search_type = int(search_type) uid = self.get_argument("uuid", "000000") else: """ post method receives parameters passed by data """ data = json.loads(self.request.body.decode()) sub = data["content"] if "content" in data else None search_type = int(data["search_type"]) if "search_type" in data else 0 uid = data["uuid"] if "uuid" in data else "000000" #### Configuration parameters #### result = OrderedDict() returncode = 0 message = "ok" output = {} entity={} start = time.time() if search_type == 0 or search_type > 4: returncode = 10000 message = "search_type is error" if sub is None and rel is None and obj is None: returncode = 10001 message = "data is null" if search_type == 1: # Check all relations and objects of content, content cannot be empty try: if sub == None or sub in [""," "]: returncode = 10002 message = "when search_type is 1, content not null" else: output = get_kg_result_0(sub) entity=[] except Exception as e: logger.info("{},error: {}".format(output)) returncode = 10002 message = "service error" elif search_type == 2: try: if sub == None or sub in [""," "]: returncode = 10003 message = "when search_type is 2, content and rel not null" else: output = [] entity=get_entity_0(sub) except Exception as e: logger.info("{},error: {}".format(entity)) returncode = 10003 message = "service error" end = time.time() detal=end-start # Output in json format, refer to the output format result["cut"] = output result["entities"]=entity #Entity recognition result["returncode"] = returncode result["message"] = message result["runtime"] = detal logger.info("result:{}".format(result)) # Local log self.write(json.dumps(result, ensure_ascii=False)) # Write results self.finish() def get(self): """ get mode call """ self.recog(mode="get") def post(self): """ post mode call """ self.recog(mode="post") # Main program if __name__ == "__main__": # """ Server startup """ print("Server is listening, Port:" + str(options.port) + " ...") sys.path.append("../") # Load the current directory into path tornado.options.parse_command_line() # Domain name rules need to be consistent with those configured in nginx application = tornado.web.Application([(r"/cutsegment", MainGetHandler)]) http_server = tornado.httpserver.HTTPServer(application) http_server.listen(options.port) tornado.ioloop.IOLoop.instance().start()
The knowledge points of the article match the official knowledge files, and you can further learn related knowledge. Network Skill TreeHomepageOverview 42259 people are learning the system