java 推荐算法
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
package com.tarzan.recommend;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
|
||||
@SpringBootApplication
|
||||
public class RecommendSystemApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(RecommendSystemApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
||||
122
src/main/java/com/tarzan/recommend/Service/FileDataSource.java
Normal file
122
src/main/java/com/tarzan/recommend/Service/FileDataSource.java
Normal file
@@ -0,0 +1,122 @@
|
||||
package com.tarzan.recommend.Service;
|
||||
|
||||
import com.tarzan.recommend.dto.ItemDTO;
|
||||
import com.tarzan.recommend.dto.RelateDTO;
|
||||
import com.tarzan.recommend.dto.UserDTO;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.assertj.core.util.Lists;
|
||||
import org.springframework.util.ResourceUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
||||
@Data
|
||||
@Slf4j
|
||||
public class FileDataSource {
|
||||
|
||||
|
||||
public final static String folderPath="F:\\ml-100k";
|
||||
|
||||
|
||||
/**
|
||||
* 方法描述: 读取基础数据
|
||||
*
|
||||
* @param
|
||||
* @Return {@link List< RelateDTO>}
|
||||
* @throws
|
||||
* @author tarzan
|
||||
* @date 2020年07月31日 16:53:40
|
||||
*/
|
||||
public static List<RelateDTO> getData() {
|
||||
List<RelateDTO> relateList = Lists.newArrayList();
|
||||
try {
|
||||
FileInputStream out = new FileInputStream(folderPath+"\\u.data");
|
||||
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
|
||||
BufferedReader in = new BufferedReader(reader);
|
||||
String line = null;
|
||||
while ((line = in.readLine()) != null) {
|
||||
String newline = line.replaceAll("[\t]", " ");
|
||||
String[] ht = newline.split(" ");
|
||||
Integer userId = Integer.parseInt(ht[0]);
|
||||
Integer movieId = Integer.parseInt(ht[1]);
|
||||
Integer rating = Integer.parseInt(ht[2]);
|
||||
RelateDTO dto = new RelateDTO(userId, movieId, rating);
|
||||
relateList.add(dto);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
return relateList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 方法描述: 读取用户数据
|
||||
*
|
||||
* @param
|
||||
* @Return {@link List< UserDTO>}
|
||||
* @throws
|
||||
* @author tarzan
|
||||
* @date 2020年07月31日 16:54:51
|
||||
*/
|
||||
public static List<UserDTO> getUserData() {
|
||||
List<UserDTO> userList = Lists.newArrayList();
|
||||
try {
|
||||
FileInputStream out = new FileInputStream(folderPath+"\\u.user");
|
||||
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
|
||||
BufferedReader in = new BufferedReader(reader);
|
||||
String line = null;
|
||||
while ((line = in.readLine()) != null) {
|
||||
String newline = line.replaceAll("[\t]", " ");
|
||||
String[] ht = newline.split("\\|");
|
||||
Integer id = Integer.parseInt(ht[0]);
|
||||
Integer age = Integer.parseInt(ht[1]);
|
||||
String sex = ht[2];
|
||||
String profession = ht[3];
|
||||
String postcode = ht[4];
|
||||
UserDTO dto = new UserDTO(id, age, sex, profession, postcode);
|
||||
userList.add(dto);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
return userList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 方法描述: 读取电影数据
|
||||
*
|
||||
* @param
|
||||
* @Return {@link List< ItemDTO>}
|
||||
* @throws
|
||||
* @author tarzan
|
||||
* @date 2020年07月31日 16:54:22
|
||||
*/
|
||||
public static List<ItemDTO> getItemData() {
|
||||
List<ItemDTO> itemList = Lists.newArrayList();
|
||||
try {
|
||||
FileInputStream out = new FileInputStream(folderPath+"\\u.item");
|
||||
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
|
||||
BufferedReader in = new BufferedReader(reader);
|
||||
String line = null;
|
||||
while ((line = in.readLine()) != null) {
|
||||
String newline = line.replaceAll("[\t]", " ");
|
||||
String[] ht = newline.split("\\|");
|
||||
Integer id = Integer.parseInt(ht[0]);
|
||||
String name = ht[1];
|
||||
String date = ht[2];
|
||||
String link = ht[3];
|
||||
ItemDTO dto = new ItemDTO(id, name, date, link);
|
||||
itemList.add(dto);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
log.error(e.getMessage());
|
||||
}
|
||||
return itemList;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
44
src/main/java/com/tarzan/recommend/Service/Recommend.java
Normal file
44
src/main/java/com/tarzan/recommend/Service/Recommend.java
Normal file
@@ -0,0 +1,44 @@
|
||||
package com.tarzan.recommend.Service;
|
||||
|
||||
import com.tarzan.recommend.core.CoreMath;
|
||||
import com.tarzan.recommend.dto.ItemDTO;
|
||||
import com.tarzan.recommend.dto.RelateDTO;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 推荐服务
|
||||
*
|
||||
* @author liu yapeng
|
||||
* @version 1.0
|
||||
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
|
||||
* @date 2020/7/31$ 16:18$
|
||||
* @since JDK1.8
|
||||
*/
|
||||
public class Recommend{
|
||||
|
||||
|
||||
/**
|
||||
* 方法描述: 猜你喜欢
|
||||
*
|
||||
* @param
|
||||
* @Return {@link List< ItemDTO>}
|
||||
* @throws
|
||||
* @author tarzan
|
||||
* @date 2020年07月31日 17:28:06
|
||||
*/
|
||||
public static List<ItemDTO> guessYouLike(){
|
||||
CoreMath coreMath = new CoreMath();
|
||||
List<RelateDTO> data= FileDataSource.getData();
|
||||
List<Integer> recommendations = coreMath.recommend(2, data);
|
||||
List<ItemDTO> itemList= FileDataSource.getItemData().stream().filter(e->recommendations.contains(e.getId())).collect(Collectors.toList());
|
||||
return itemList;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
List<ItemDTO> itemList= Recommend.guessYouLike();
|
||||
System.out.println("------猜你可能喜欢---------------下列电影="+itemList.stream().map(e->e.getName()).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
}
|
||||
109
src/main/java/com/tarzan/recommend/core/CoreMath.java
Normal file
109
src/main/java/com/tarzan/recommend/core/CoreMath.java
Normal file
@@ -0,0 +1,109 @@
|
||||
package com.tarzan.recommend.core;
|
||||
|
||||
import com.tarzan.recommend.dto.RelateDTO;
|
||||
import org.assertj.core.util.Lists;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
/**
|
||||
* 核心算法
|
||||
*
|
||||
* @author tarzan
|
||||
* @version 1.0
|
||||
* @company 洛阳图联科技有限公司
|
||||
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
|
||||
* @date 2020/7/31$ 15:21$
|
||||
* @since JDK1.8
|
||||
*/
|
||||
public class CoreMath {
|
||||
|
||||
public List<Integer> recommend(Integer userId, List<RelateDTO> list) {
|
||||
//找到最近邻用户id
|
||||
Map<Double, Integer> distances = computeNearestNeighbor(userId, list);
|
||||
Integer nearest = distances.values().iterator().next();
|
||||
|
||||
Map<Integer, List<RelateDTO>> userMap=list.stream().collect(Collectors.groupingBy(RelateDTO::getUseId));
|
||||
|
||||
//最近邻用户看过电影列表
|
||||
List<Integer> neighborItemList = userMap.get(nearest).stream().map(e->e.getModuleId()).collect(Collectors.toList());
|
||||
//指定用户看过电影列表
|
||||
List<Integer> userItemList = userMap.get(userId).stream().map(e->e.getModuleId()).collect(Collectors.toList());;
|
||||
|
||||
//找到最近邻看过,但是该用户没看过的电影,计算推荐,放入推荐列表
|
||||
List<Integer> recommendList = new ArrayList<>();
|
||||
for (Integer item : neighborItemList) {
|
||||
if (!userItemList.contains(item)) {
|
||||
recommendList.add(item);
|
||||
}
|
||||
}
|
||||
Collections.sort(recommendList);
|
||||
return recommendList;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 在给定userId的情况下,计算其他用户和它的相关系数并排序
|
||||
* @param userId
|
||||
* @param list
|
||||
* @return
|
||||
*/
|
||||
private Map<Double, Integer> computeNearestNeighbor(Integer userId, List<RelateDTO> list) {
|
||||
Map<Integer, List<RelateDTO>> userMap=list.stream().collect(Collectors.groupingBy(RelateDTO::getUseId));
|
||||
Map<Double, Integer> distances = new TreeMap<>();
|
||||
userMap.forEach((k,v)->{
|
||||
if(k!=userId){
|
||||
double distance = pearson_dis(v,userMap.get(userId));
|
||||
distances.put(distance, k);
|
||||
}
|
||||
});
|
||||
return distances;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 计算两个序列间的相关系数
|
||||
*
|
||||
* @param xList
|
||||
* @param yList
|
||||
* @return
|
||||
*/
|
||||
private double pearson_dis(List<RelateDTO> xList, List<RelateDTO> yList) {
|
||||
List<Integer> xs= Lists.newArrayList();
|
||||
List<Integer> ys= Lists.newArrayList();
|
||||
xList.forEach(x->{
|
||||
yList.forEach(y->{
|
||||
if(x.getModuleId()==y.getModuleId()){
|
||||
xs.add(x.getIndex());
|
||||
ys.add(y.getIndex());
|
||||
}
|
||||
});
|
||||
});
|
||||
return getRelate(xs,ys);
|
||||
}
|
||||
|
||||
/**
|
||||
* 方法描述: 皮尔森(pearson)相关系数计算
|
||||
*
|
||||
* @param xs
|
||||
* @param ys
|
||||
* @Return {@link Double}
|
||||
* @throws
|
||||
* @author tarzan
|
||||
* @date 2020年07月31日 17:03:20
|
||||
*/
|
||||
public static Double getRelate(List<Integer> xs, List<Integer> ys){
|
||||
int n=xs.size();
|
||||
double Ex= xs.stream().mapToDouble(x->x).sum();
|
||||
double Ey=ys.stream().mapToDouble(y->y).sum();
|
||||
double Ex2=xs.stream().mapToDouble(x->Math.pow(x,2)).sum();
|
||||
double Ey2=ys.stream().mapToDouble(y->Math.pow(y,2)).sum();
|
||||
double Exy= IntStream.range(0,n).mapToDouble(i->xs.get(i)*ys.get(i)).sum();
|
||||
double numerator=Exy-Ex*Ey/n;
|
||||
double denominator=Math.sqrt((Ex2-Math.pow(Ex,2)/n)*(Ey2-Math.pow(Ey,2)/n));
|
||||
if (denominator==0) return 0.0;
|
||||
return numerator/denominator;
|
||||
}
|
||||
|
||||
}
|
||||
26
src/main/java/com/tarzan/recommend/dto/ItemDTO.java
Normal file
26
src/main/java/com/tarzan/recommend/dto/ItemDTO.java
Normal file
@@ -0,0 +1,26 @@
|
||||
package com.tarzan.recommend.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 业务项
|
||||
*
|
||||
* @author liu yapeng
|
||||
* @version 1.0
|
||||
* @company 洛阳图联科技有限公司
|
||||
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
|
||||
* @date 2020/7/31$ 15:02$
|
||||
* @since JDK1.8
|
||||
*/
|
||||
@Data
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
public class ItemDTO {
|
||||
private Integer id;
|
||||
private String name;
|
||||
private String date;
|
||||
private String link;
|
||||
|
||||
}
|
||||
29
src/main/java/com/tarzan/recommend/dto/RelateDTO.java
Normal file
29
src/main/java/com/tarzan/recommend/dto/RelateDTO.java
Normal file
@@ -0,0 +1,29 @@
|
||||
package com.tarzan.recommend.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 关系数据
|
||||
*
|
||||
* @author liu yapeng
|
||||
* @version 1.0
|
||||
* @company 洛阳图联科技有限公司
|
||||
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
|
||||
* @date 2020/7/31$ 14:51$
|
||||
* @since JDK1.8
|
||||
*/
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class RelateDTO {
|
||||
//用户id
|
||||
private Integer useId;
|
||||
//业务id
|
||||
private Integer moduleId;
|
||||
//指数
|
||||
private Integer index;
|
||||
|
||||
|
||||
}
|
||||
32
src/main/java/com/tarzan/recommend/dto/UserDTO.java
Normal file
32
src/main/java/com/tarzan/recommend/dto/UserDTO.java
Normal file
@@ -0,0 +1,32 @@
|
||||
package com.tarzan.recommend.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* 用户对象
|
||||
*
|
||||
* @author liu yapeng
|
||||
* @version 1.0
|
||||
* @company 洛阳图联科技有限公司
|
||||
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
|
||||
* @date 2020/7/31$ 14:55$
|
||||
* @since JDK1.8
|
||||
*/
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class UserDTO {
|
||||
//主键
|
||||
private Integer id;
|
||||
//年纪
|
||||
private Integer age;
|
||||
//性别
|
||||
private String sex;
|
||||
//职业
|
||||
private String profession;
|
||||
//邮编
|
||||
private String postcode;
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user