Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(module): add emoji-json-generator module #175

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions emoji-table-generator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,25 @@ It is used for the table in the top level README :)

Run with:

```
```bash
mvn exec:java -Dexec.mainClass="com.vdurmont.emoji.TableGenerator"
```
```

```bash
mvn exec:java -Dexec.mainClass="com.vdurmont.emoji.JsonGenerator" [-Dexec.args="proxy=1270.0.1 port=1080 \
path=path\to\already\download\emoji.html save_url=/path/to/emoji.json \
url=https://unicode.org/emoji/charts/full-emoji-list.html \
emoji_path=/path/to/already/emoji.json \
emoji_i18n_path=/path/to/i18n_description/emoji_i18n.json
"]
```

**note:**
- []: meaning optional
- proxy: proxy server address ,e.g. 127.0.0.1
- port: proxy server port ,e.g. 1080
- path: download https://unicode.org/emoji/charts/full-emoji-list.html and save to local path, priority is higher than the proxy and port
- save_url: generator emoji json save path, default is `System.getProperty("java.io.tmpdir")`
- url: emoji list url, default is https://unicode.org/emoji/charts/full-emoji-list.html
- emoji_path: emoji json path,e.g. src/main/resources/emojis.json, use emoji_path json overwrite https://unicode.org/emoji/charts/full-emoji-list.html by `emojiChar`,content like https://github.com/vdurmont/emoji-java/blob/master/src/main/resources/emojis.json
- emoji_i18n_path: if you want i18n emoji `description`,content like https://github.com/anjia0532/emoji-java/blob/master/src/main/resources/emojis.i18n.json
17 changes: 17 additions & 0 deletions emoji-table-generator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@
<groupId>com.vdurmont</groupId>
<artifactId>emoji-table-generator</artifactId>
<version>1.0.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>8</source>
<target>8</target>
</configuration>
</plugin>
</plugins>
</build>
<packaging>jar</packaging>

<name>emoji-table-generator</name>
Expand All @@ -20,5 +32,10 @@
<artifactId>emoji-java</artifactId>
<version>5.1.1</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package com.vdurmont.emoji;

import org.json.JSONObject;

import java.util.List;
import java.util.Objects;

/**
* TODO
*
* @author AnJia
* @since 2020-12-14 19:37
*/
public class EmojiDto {

private String description;
private String emojiChar;
private String emoji;
private List<String> aliases;
private List<String> tags;

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public String getEmojiChar() {
return emojiChar;
}

public void setEmojiChar(String emojiChar) {
this.emojiChar = emojiChar;
}

public String getEmoji() {
return emoji;
}

public void setEmoji(String emoji) {
this.emoji = emoji;
}

public List<String> getAliases() {
return aliases;
}

public void setAliases(List<String> aliases) {
this.aliases = aliases;
}

public List<String> getTags() {
return tags;
}

public void setTags(List<String> tags) {
this.tags = tags;
}

@Override public String toString() {
return new JSONObject(this).toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
package com.vdurmont.emoji;

import org.json.JSONArray;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.*;

/**
* This app generate the emoji json from https://unicode.org/emoji/charts/full-emoji-list.html ;)
* <p/>
* Run with:
* mvn exec:java -Dexec.mainClass="com.vdurmont.emoji.JsonGenerator"
*/
public class JsonGenerator {
private static final String ARGS_NAME_PROXY_HOST = "proxy";
private static final String ARGS_NAME_PROXY_PORT = "port";
private static final String ARGS_NAME_OFFLINE_PATH = "path";
private static final String ARGS_NAME_ONLINE_URL = "url";
private static final String ARGS_NAME_SAVE_PATH = "save_url";
private static final String ARGS_NAME_EMOJI_JSON_PATH = "emoji_path";
private static final String ARGS_NAME_EMOJI_I18N_JSON_PATH = "emoji_i18n_path";
private static final String STRING_SYMBOL_EQUAL = "=";
private static final String EMOJI_REMOTE_ONLINE_URL = "https://unicode.org/emoji/charts/full-emoji-list.html";
private static Map<String, String> ARGS_MAP;

public static void main(String[] args) throws IOException {
ARGS_MAP = argsParser(args);
Document root = getDocument();

Elements tdTags, trTags = root.getElementsByTag("tr");
String aliasBigHead = null, aliasMediumHead = null;
Element bighead, mediumhead;
JSONArray emojis = new JSONArray();
String desc;
JSONObject emoji;
Map<String, JSONObject> emojiMap = getJsonMapFromEmojiJson(ARGS_MAP.get(ARGS_NAME_EMOJI_JSON_PATH));
Map<String, String> emojiI18nMap = getI18nMapFromEmojiI18nJson(ARGS_MAP.get(ARGS_NAME_EMOJI_I18N_JSON_PATH));
for (Element trTag : trTags) {
bighead = trTag.select("th.bighead>a").first();
if (!Objects.isNull(bighead)) {
aliasBigHead = bighead.attr("name");
continue;
}
mediumhead = trTag.select("th.mediumhead>a").first();
if (!Objects.isNull(mediumhead)) {
aliasMediumHead = mediumhead.attr("name");
continue;
}
tdTags = trTag.children();
if (!tdTags.get(1).hasClass("code")) {
continue;
}
desc = tdTags.last().text().replaceAll("[^\\p{L}\\p{M}\\p{N}\\p{P}\\p{Z}\\p{Cf}\\p{Cs}\\p{Sc}\\s]", "");

String emojiChar = tdTags.get(2).text();
if (tdTags.get(1).text().endsWith("U+FE0F U+20E3")) {
emojiChar = new StringBuilder(emojiChar).deleteCharAt(1).toString();
}
if (!emojiMap.containsKey(emojiChar)) {
emoji = new JSONObject();
emoji.put("emojiChar", emojiChar);
emoji.put("emoji", convertEmoji2Unicode(emojiChar));
emoji.put("description", emojiI18nMap.getOrDefault(emojiChar, desc));
emoji.put("aliases", desc.replace(" ", "_"));
emoji.put("tags", Arrays.asList(aliasBigHead, aliasMediumHead));
} else {
emoji = emojiMap.get(emojiChar);
emoji.put("description", emojiI18nMap.getOrDefault(emoji.getString("emojiChar"),
emoji.getString("description")));
emoji.put("emoji", convertEmoji2Unicode(emoji.getString("emojiChar")));
}
emojis.put(emoji);
}

String emojiJson = emojis.toString(4).replaceAll("/", "\\\\")
.replaceAll("\\^\\^u", "\\\\u");

File emojiFile = new File(ARGS_MAP.getOrDefault(ARGS_NAME_SAVE_PATH, System.getProperty("java.io.tmpdir")
+ File.separator + "emoji.json"));
System.out.println("save to: " + emojiFile.getAbsolutePath());
Files.write(emojiFile.toPath(), Collections.singleton(emojiJson), StandardCharsets.UTF_8);
}

/**
* convert emoji to unicode
*
* @param emoji emoji char
* @return emoji's unicode
*/
private static String convertEmoji2Unicode(String emoji) {
char[] chars = emoji.toCharArray();
StringBuilder builder = new StringBuilder();
for (char c : chars) {
builder.append("^^u");
builder.append(Integer.toHexString(0x10000 | c).substring(1).toUpperCase());
}
return builder.toString();
}

/**
* jsoup document builder
*
* @return access url and get body when args without `path` arg else read local file by `path`
* @throws IOException io exception
*/
private static Document getDocument() throws IOException {
if (isBlank(ARGS_MAP.get(ARGS_NAME_OFFLINE_PATH))) {
return getConnection().get();
} else {
return Jsoup.parse(new File(ARGS_MAP.get(ARGS_NAME_OFFLINE_PATH)), "utf-8", "https://unicode.org/");
}
}

/**
* builder jsoup connection
*
* @return jsoup connection
*/
private static Connection getConnection() {
Connection connect = Jsoup.connect(ARGS_MAP.getOrDefault(ARGS_NAME_ONLINE_URL, EMOJI_REMOTE_ONLINE_URL))
.maxBodySize(Integer.MAX_VALUE);
if (!isBlank(ARGS_MAP.get(ARGS_NAME_PROXY_HOST)) && !isBlank(ARGS_MAP.get(ARGS_NAME_PROXY_PORT))) {
connect.proxy(ARGS_MAP.get(ARGS_NAME_PROXY_HOST), Integer.parseInt(ARGS_MAP.get(ARGS_NAME_PROXY_PORT)));
}
return connect;
}

/**
* like apache commons lang 3 StringUtils.isBlank
*
* @param str check string
* @return blank is true,else false
*/
private static boolean isBlank(String str) {
return Objects.isNull(str) || "".equals(str.trim());
}

/**
* args parser
*
* @param args command args
* @return args Map
*/
private static Map<String, String> argsParser(String[] args) {
if (Objects.isNull(args) || args.length == 0) {
return Collections.emptyMap();
}
ARGS_MAP = new HashMap<>(args.length);
int index;
for (String arg : args) {
index = arg.indexOf(STRING_SYMBOL_EQUAL);
if (index <= 0) {
continue;
}
ARGS_MAP.put(arg.substring(0, index), arg.substring(index + 1));
}
return ARGS_MAP;
}

private static Map<String, JSONObject> getJsonMapFromEmojiJson(String emojiPath) {
if (Objects.isNull(emojiPath) || emojiPath.length() == 0) {
return Collections.emptyMap();
}
Map<String, JSONObject> emojiMap;
try {
JSONArray emojiArray = new JSONArray(String.join("", Files.readAllLines(new File(emojiPath).toPath())));
emojiMap = new HashMap<>(emojiArray.length());
for (Object json : emojiArray) {
JSONObject emoji = (JSONObject) json;
emojiMap.put(emoji.getString("emojiChar"), emoji);
}
} catch (Exception ex) {
ex.printStackTrace();
emojiMap = Collections.emptyMap();
}
return emojiMap;
}

private static Map<String, String> getI18nMapFromEmojiI18nJson(String emojiI18nPath) {
if (Objects.isNull(emojiI18nPath) || emojiI18nPath.length() == 0) {
return Collections.emptyMap();
}
Map<String, String> emojiMap;
try {
JSONArray emojiArray = new JSONArray(String.join("", Files.readAllLines(new File(emojiI18nPath).toPath())));
emojiMap = new HashMap<>(emojiArray.length());
for (Object json : emojiArray) {
JSONObject emoji = (JSONObject) json;
emojiMap.put(emoji.getString("emojiChar"), emoji.getString("description"));
}
} catch (Exception ex) {
ex.printStackTrace();
emojiMap = Collections.emptyMap();
}
return emojiMap;
}
}
6 changes: 6 additions & 0 deletions emoji-table-generator/src/main/resources/emojis.i18n.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[
{
"emojiChar": "👀",
"description": "两只眼睛"
}
]
Loading