代码已上传至Github:https://github.com/2NaCl/java_py_TCP
首先说明一下,此项目基于TCP原理,也就是使用Socket实现的。实验背景是这样的,我现在在做JavaWeb前后端开发,但是这个接口的数据需要一定的数据分析,于是我用Python实现了数据分析的部分,但是分析的结果数据,我需要传回JavaWeb后端,然后后端传给前端(这里没有展示),于是就有了如下的代码。
在这里,Java相当于一个客户端,Python相当于一个服务端,因为我们Python需要随时开放着,等待数据的进入和回传。至此,我们可以用Java编写一个TCP客户端,用Python编写我们的Server端。
并且附带Python机器学习所需要的相关数据与模型,上传到了Github。
python"># -*- coding: UTF-8 -*-
from socket import *
from time import ctime
import jieba
from jieba import posseg
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, cross_validate
material_userdict = 'material_userdict.txt'
device_userdict = 'device_userdict.txt'
worker_userdict = 'worker_userdict.txt'
question_file = 'questions.txt'
#model_file = 'forest'
model_file = 'forest_test'
question_row = 'questions'
question_cut_row = 'questions_cut'
POS = ['mmm', 'mmd', 'mmw', 'm']
HOST = ''
PORT = 21567
BUFSIZ = 1024
ADDR = (HOST, PORT)
jieba.load_userdict(material_userdict)
jieba.load_userdict(device_userdict)
jieba.load_userdict(worker_userdict)
def cut(string):
return ' '.join(jieba.cut(string))
questions = pd.read_csv(question_file, sep='\t', encoding='utf-8')
questions[question_cut_row] = questions[question_row].apply(cut)
import pickle
with open(model_file, 'rb') as training_model:
model = pickle.load(training_model)
def abstract_str(test_string):
seg = jieba.posseg.cut(test_string)
l = []
for i in seg:
l.append((str(i.word), str(i.flag)))
tcpCliSock.send(((str(i.word) + ',' + str(i.flag) + " ").encode()))
print(l)
# print(l[0][0])
fin_str = ''
variable = ''
for j in range(len(l)):
if l[j][1] == POS[0]:
fin_str += POS[0]
variable = l[j][0]
elif l[j][1] == POS[1]:
fin_str += POS[1]
variable = l[j][0]
elif l[j][1] == POS[2]:
fin_str += POS[2]
else:
fin_str += l[j][0]
return fin_str, variable
def vectorized_str(test_string):
test_cut = cut(test_string)
test_feature = vectorizer.transform([test_cut]).toarray()
return test_feature
vectorizer = CountVectorizer()
data_features = vectorizer.fit_transform(questions[question_cut_row]).toarray()
tcpSerSock = socket(AF_INET, SOCK_STREAM)
tcpSerSock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
tcpSerSock.bind(ADDR)
tcpSerSock.listen(5)
while True:
print('把客户端启动一下啊')
tcpCliSock, addr = tcpSerSock.accept()
print('ip:', addr)
while True:
data = tcpCliSock.recv(BUFSIZ).decode()
if not data:
break
print('get client data:%s\n[%s]' % (data, ctime()))
test_string = data
a_s, variable = abstract_str(test_string)
v_s = vectorized_str(a_s)
result = model.predict(v_s)
result = str(result) + '\n' + str(variable)
send_str=('server send data:%s\n[%s]' % (result, ctime()))
tcpCliSock.send(result.encode())
if data == 'session_close':
break
tcpCliSock.close
tcpSerSock.close
java">package com.techfantasy.qs.socket;
import java.io.*;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class SocketClient {
public static void main(String[] args) throws IOException {
try {
Socket socket = new Socket("localhost",21567);
while (true) {
//获取输出流,向服务器端发送信息
OutputStream os = socket.getOutputStream();//字节输出流
PrintWriter pw = new PrintWriter(os);//将输出流包装为打印流
System.out.print("> ");
Scanner instr = new Scanner(System.in);
pw.write(instr.nextLine());//前端要填的参数
pw.flush();
System.out.println("\n");
//socket.shutdownOutput();//关闭输出流
InputStream is = socket.getInputStream();
BufferedReader in = new BufferedReader(new InputStreamReader(is));
String[] s = in.readLine().split(" ");
List<List<String>> arrlist2 = new ArrayList<>();
for (String str : s) {
List<String> arrayList = new ArrayList<>();
arrayList.add(str);
arrlist2.add(arrayList);
}
System.out.println(arrlist2);
// is.close();
// in.close();
// socket.close();
}
} catch (UnknownHostException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}