1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.nnsoft.sameas4j;
23
24 import java.io.ByteArrayOutputStream;
25 import java.net.URI;
26 import java.util.BitSet;
27
28 import com.google.gson.JsonArray;
29 import com.google.gson.JsonElement;
30 import com.google.gson.JsonParseException;
31
32
33
34
35
36 abstract class AbstractEquivalenceDeserializer {
37
38
39
40
41 private static final String URI = "uri";
42
43
44
45
46 private static final String DUPLICATES = "duplicates";
47
48
49
50
51 private static final String EXCEPTION_MESSAGE = "URI '%s' seems to be not well-formed";
52
53
54
55
56 private static final String UTF_8_ENCODING = "UTF-8";
57
58
59
60
61 private static final BitSet UNRESERVED_CHARS = new BitSet(256);
62
63
64
65
66 private static final byte URL_ESCAPE_CHAR = '%';
67
68 static {
69 for (byte b = 'A'; b <= 'Z'; b++) {
70 UNRESERVED_CHARS.set(b);
71 }
72 for (byte b = 'a'; b <= 'z'; b++) {
73 UNRESERVED_CHARS.set(b);
74 }
75 for (byte b = '0'; b <= '9'; b++) {
76 UNRESERVED_CHARS.set(b);
77 }
78
79
80 UNRESERVED_CHARS.set('$');
81 UNRESERVED_CHARS.set('#');
82 UNRESERVED_CHARS.set('&');
83 UNRESERVED_CHARS.set('+');
84 UNRESERVED_CHARS.set(',');
85 UNRESERVED_CHARS.set(';');
86 UNRESERVED_CHARS.set('=');
87 UNRESERVED_CHARS.set('?');
88 UNRESERVED_CHARS.set('@');
89 UNRESERVED_CHARS.set('/');
90 UNRESERVED_CHARS.set(':');
91 UNRESERVED_CHARS.set('-');
92 UNRESERVED_CHARS.set('.');
93 UNRESERVED_CHARS.set('_');
94 UNRESERVED_CHARS.set('~');
95 }
96
97
98
99
100
101
102
103 public static String urlEncode(final String text) throws Exception {
104 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
105
106 for (int b : toUTF8Bytes(text)) {
107 if (b < 0) {
108 b = 256 + b;
109 }
110
111 if (UNRESERVED_CHARS.get(b)) {
112 buffer.write(b);
113 } else {
114 buffer.write(URL_ESCAPE_CHAR);
115 char hex1 = Character.toUpperCase(Character.forDigit(
116 (b >> 4) & 0xF, 16));
117 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF,
118 16));
119 buffer.write(hex1);
120 buffer.write(hex2);
121 }
122 }
123
124 return new String(buffer.toByteArray());
125 }
126
127
128
129
130
131
132
133 private static byte[] toUTF8Bytes(final String text) throws Exception {
134 return text.getBytes(UTF_8_ENCODING);
135 }
136
137
138
139
140
141
142
143
144 public Equivalence getEquivalence(JsonElement json) {
145 Equivalence equivalence;
146 String uriString = json.getAsJsonObject().getAsJsonPrimitive(URI).getAsString();
147 URI uri;
148 try {
149 uri = new URI(urlEncode(uriString));
150 } catch (Exception e) {
151 throw new JsonParseException(String.format(EXCEPTION_MESSAGE, uriString));
152 }
153 equivalence = new Equivalence(uri);
154 JsonArray duplicates = json.getAsJsonObject().getAsJsonArray(DUPLICATES);
155 for (int i = 0; i < duplicates.size(); i++) {
156 try {
157 equivalence.addDuplicate(new URI(urlEncode(duplicates.get(i).getAsString())));
158 } catch (Exception e) {
159
160 continue;
161 }
162 }
163 return equivalence;
164 }
165
166 public static void main( String[] args ) throws Exception
167 {
168 System.err.println(urlEncode( "http://it.bestshopping.com/prezzi/Rome.sku=9780751339031|.html" ));
169 }
170
171 }